diff options
Diffstat (limited to 'net/ipv4')
62 files changed, 2136 insertions, 1286 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1aa2dc9e380e..743f5542d65a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -94,6 +94,7 @@ | |||
94 | #include <linux/igmp.h> | 94 | #include <linux/igmp.h> |
95 | #include <linux/inetdevice.h> | 95 | #include <linux/inetdevice.h> |
96 | #include <linux/netdevice.h> | 96 | #include <linux/netdevice.h> |
97 | #include <net/checksum.h> | ||
97 | #include <net/ip.h> | 98 | #include <net/ip.h> |
98 | #include <net/protocol.h> | 99 | #include <net/protocol.h> |
99 | #include <net/arp.h> | 100 | #include <net/arp.h> |
@@ -245,7 +246,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) | |||
245 | int hash; | 246 | int hash; |
246 | struct net_protocol *ipprot; | 247 | struct net_protocol *ipprot; |
247 | 248 | ||
248 | if (net == &init_net) | 249 | if (net_eq(net, &init_net)) |
249 | return 1; | 250 | return 1; |
250 | 251 | ||
251 | hash = protocol & (MAX_INET_PROTOS - 1); | 252 | hash = protocol & (MAX_INET_PROTOS - 1); |
@@ -272,10 +273,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) | |||
272 | int try_loading_module = 0; | 273 | int try_loading_module = 0; |
273 | int err; | 274 | int err; |
274 | 275 | ||
275 | if (sock->type != SOCK_RAW && | 276 | if (unlikely(!inet_ehash_secret)) |
276 | sock->type != SOCK_DGRAM && | 277 | if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) |
277 | !inet_ehash_secret) | 278 | build_ehash_secret(); |
278 | build_ehash_secret(); | ||
279 | 279 | ||
280 | sock->state = SS_UNCONNECTED; | 280 | sock->state = SS_UNCONNECTED; |
281 | 281 | ||
@@ -1070,11 +1070,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1070 | return 0; | 1070 | return 0; |
1071 | 1071 | ||
1072 | if (sysctl_ip_dynaddr > 1) { | 1072 | if (sysctl_ip_dynaddr > 1) { |
1073 | printk(KERN_INFO "%s(): shifting inet->" | 1073 | printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n", |
1074 | "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", | 1074 | __func__, &old_saddr, &new_saddr); |
1075 | __func__, | ||
1076 | NIPQUAD(old_saddr), | ||
1077 | NIPQUAD(new_saddr)); | ||
1078 | } | 1075 | } |
1079 | 1076 | ||
1080 | inet->saddr = inet->rcv_saddr = new_saddr; | 1077 | inet->saddr = inet->rcv_saddr = new_saddr; |
@@ -1245,6 +1242,100 @@ out: | |||
1245 | return segs; | 1242 | return segs; |
1246 | } | 1243 | } |
1247 | 1244 | ||
1245 | static struct sk_buff **inet_gro_receive(struct sk_buff **head, | ||
1246 | struct sk_buff *skb) | ||
1247 | { | ||
1248 | struct net_protocol *ops; | ||
1249 | struct sk_buff **pp = NULL; | ||
1250 | struct sk_buff *p; | ||
1251 | struct iphdr *iph; | ||
1252 | int flush = 1; | ||
1253 | int proto; | ||
1254 | int id; | ||
1255 | |||
1256 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) | ||
1257 | goto out; | ||
1258 | |||
1259 | iph = ip_hdr(skb); | ||
1260 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | ||
1261 | |||
1262 | rcu_read_lock(); | ||
1263 | ops = rcu_dereference(inet_protos[proto]); | ||
1264 | if (!ops || !ops->gro_receive) | ||
1265 | goto out_unlock; | ||
1266 | |||
1267 | if (iph->version != 4 || iph->ihl != 5) | ||
1268 | goto out_unlock; | ||
1269 | |||
1270 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) | ||
1271 | goto out_unlock; | ||
1272 | |||
1273 | flush = ntohs(iph->tot_len) != skb->len || | ||
1274 | iph->frag_off != htons(IP_DF); | ||
1275 | id = ntohs(iph->id); | ||
1276 | |||
1277 | for (p = *head; p; p = p->next) { | ||
1278 | struct iphdr *iph2; | ||
1279 | |||
1280 | if (!NAPI_GRO_CB(p)->same_flow) | ||
1281 | continue; | ||
1282 | |||
1283 | iph2 = ip_hdr(p); | ||
1284 | |||
1285 | if (iph->protocol != iph2->protocol || | ||
1286 | iph->tos != iph2->tos || | ||
1287 | memcmp(&iph->saddr, &iph2->saddr, 8)) { | ||
1288 | NAPI_GRO_CB(p)->same_flow = 0; | ||
1289 | continue; | ||
1290 | } | ||
1291 | |||
1292 | /* All fields must match except length and checksum. */ | ||
1293 | NAPI_GRO_CB(p)->flush |= | ||
1294 | memcmp(&iph->frag_off, &iph2->frag_off, 4) || | ||
1295 | (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; | ||
1296 | |||
1297 | NAPI_GRO_CB(p)->flush |= flush; | ||
1298 | } | ||
1299 | |||
1300 | NAPI_GRO_CB(skb)->flush |= flush; | ||
1301 | __skb_pull(skb, sizeof(*iph)); | ||
1302 | skb_reset_transport_header(skb); | ||
1303 | |||
1304 | pp = ops->gro_receive(head, skb); | ||
1305 | |||
1306 | out_unlock: | ||
1307 | rcu_read_unlock(); | ||
1308 | |||
1309 | out: | ||
1310 | NAPI_GRO_CB(skb)->flush |= flush; | ||
1311 | |||
1312 | return pp; | ||
1313 | } | ||
1314 | |||
1315 | static int inet_gro_complete(struct sk_buff *skb) | ||
1316 | { | ||
1317 | struct net_protocol *ops; | ||
1318 | struct iphdr *iph = ip_hdr(skb); | ||
1319 | int proto = iph->protocol & (MAX_INET_PROTOS - 1); | ||
1320 | int err = -ENOSYS; | ||
1321 | __be16 newlen = htons(skb->len - skb_network_offset(skb)); | ||
1322 | |||
1323 | csum_replace2(&iph->check, iph->tot_len, newlen); | ||
1324 | iph->tot_len = newlen; | ||
1325 | |||
1326 | rcu_read_lock(); | ||
1327 | ops = rcu_dereference(inet_protos[proto]); | ||
1328 | if (WARN_ON(!ops || !ops->gro_complete)) | ||
1329 | goto out_unlock; | ||
1330 | |||
1331 | err = ops->gro_complete(skb); | ||
1332 | |||
1333 | out_unlock: | ||
1334 | rcu_read_unlock(); | ||
1335 | |||
1336 | return err; | ||
1337 | } | ||
1338 | |||
1248 | int inet_ctl_sock_create(struct sock **sk, unsigned short family, | 1339 | int inet_ctl_sock_create(struct sock **sk, unsigned short family, |
1249 | unsigned short type, unsigned char protocol, | 1340 | unsigned short type, unsigned char protocol, |
1250 | struct net *net) | 1341 | struct net *net) |
@@ -1311,6 +1402,7 @@ EXPORT_SYMBOL_GPL(snmp_mib_free); | |||
1311 | #ifdef CONFIG_IP_MULTICAST | 1402 | #ifdef CONFIG_IP_MULTICAST |
1312 | static struct net_protocol igmp_protocol = { | 1403 | static struct net_protocol igmp_protocol = { |
1313 | .handler = igmp_rcv, | 1404 | .handler = igmp_rcv, |
1405 | .netns_ok = 1, | ||
1314 | }; | 1406 | }; |
1315 | #endif | 1407 | #endif |
1316 | 1408 | ||
@@ -1319,6 +1411,8 @@ static struct net_protocol tcp_protocol = { | |||
1319 | .err_handler = tcp_v4_err, | 1411 | .err_handler = tcp_v4_err, |
1320 | .gso_send_check = tcp_v4_gso_send_check, | 1412 | .gso_send_check = tcp_v4_gso_send_check, |
1321 | .gso_segment = tcp_tso_segment, | 1413 | .gso_segment = tcp_tso_segment, |
1414 | .gro_receive = tcp4_gro_receive, | ||
1415 | .gro_complete = tcp4_gro_complete, | ||
1322 | .no_policy = 1, | 1416 | .no_policy = 1, |
1323 | .netns_ok = 1, | 1417 | .netns_ok = 1, |
1324 | }; | 1418 | }; |
@@ -1411,6 +1505,8 @@ static struct packet_type ip_packet_type = { | |||
1411 | .func = ip_rcv, | 1505 | .func = ip_rcv, |
1412 | .gso_send_check = inet_gso_send_check, | 1506 | .gso_send_check = inet_gso_send_check, |
1413 | .gso_segment = inet_gso_segment, | 1507 | .gso_segment = inet_gso_segment, |
1508 | .gro_receive = inet_gro_receive, | ||
1509 | .gro_complete = inet_gro_complete, | ||
1414 | }; | 1510 | }; |
1415 | 1511 | ||
1416 | static int __init inet_init(void) | 1512 | static int __init inet_init(void) |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 8219b7e0968d..e878e494296e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -201,15 +201,16 @@ out: | |||
201 | 201 | ||
202 | static void ah4_err(struct sk_buff *skb, u32 info) | 202 | static void ah4_err(struct sk_buff *skb, u32 info) |
203 | { | 203 | { |
204 | struct iphdr *iph = (struct iphdr*)skb->data; | 204 | struct net *net = dev_net(skb->dev); |
205 | struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); | 205 | struct iphdr *iph = (struct iphdr *)skb->data; |
206 | struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); | ||
206 | struct xfrm_state *x; | 207 | struct xfrm_state *x; |
207 | 208 | ||
208 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || | 209 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || |
209 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 210 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
210 | return; | 211 | return; |
211 | 212 | ||
212 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 213 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); |
213 | if (!x) | 214 | if (!x) |
214 | return; | 215 | return; |
215 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 216 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", |
@@ -293,9 +294,7 @@ static void ah_destroy(struct xfrm_state *x) | |||
293 | return; | 294 | return; |
294 | 295 | ||
295 | kfree(ahp->work_icv); | 296 | kfree(ahp->work_icv); |
296 | ahp->work_icv = NULL; | ||
297 | crypto_free_hash(ahp->tfm); | 297 | crypto_free_hash(ahp->tfm); |
298 | ahp->tfm = NULL; | ||
299 | kfree(ahp); | 298 | kfree(ahp); |
300 | } | 299 | } |
301 | 300 | ||
@@ -316,6 +315,7 @@ static struct net_protocol ah4_protocol = { | |||
316 | .handler = xfrm4_rcv, | 315 | .handler = xfrm4_rcv, |
317 | .err_handler = ah4_err, | 316 | .err_handler = ah4_err, |
318 | .no_policy = 1, | 317 | .no_policy = 1, |
318 | .netns_ok = 1, | ||
319 | }; | 319 | }; |
320 | 320 | ||
321 | static int __init ah4_init(void) | 321 | static int __init ah4_init(void) |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1a9dd66511fc..29a74c01d8de 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -506,7 +506,7 @@ int arp_bind_neighbour(struct dst_entry *dst) | |||
506 | if (dev == NULL) | 506 | if (dev == NULL) |
507 | return -EINVAL; | 507 | return -EINVAL; |
508 | if (n == NULL) { | 508 | if (n == NULL) { |
509 | __be32 nexthop = ((struct rtable*)dst)->rt_gateway; | 509 | __be32 nexthop = ((struct rtable *)dst)->rt_gateway; |
510 | if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) | 510 | if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) |
511 | nexthop = 0; | 511 | nexthop = 0; |
512 | n = __neigh_lookup_errno( | 512 | n = __neigh_lookup_errno( |
@@ -640,14 +640,14 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
640 | arp_ptr=(unsigned char *)(arp+1); | 640 | arp_ptr=(unsigned char *)(arp+1); |
641 | 641 | ||
642 | memcpy(arp_ptr, src_hw, dev->addr_len); | 642 | memcpy(arp_ptr, src_hw, dev->addr_len); |
643 | arp_ptr+=dev->addr_len; | 643 | arp_ptr += dev->addr_len; |
644 | memcpy(arp_ptr, &src_ip,4); | 644 | memcpy(arp_ptr, &src_ip, 4); |
645 | arp_ptr+=4; | 645 | arp_ptr += 4; |
646 | if (target_hw != NULL) | 646 | if (target_hw != NULL) |
647 | memcpy(arp_ptr, target_hw, dev->addr_len); | 647 | memcpy(arp_ptr, target_hw, dev->addr_len); |
648 | else | 648 | else |
649 | memset(arp_ptr, 0, dev->addr_len); | 649 | memset(arp_ptr, 0, dev->addr_len); |
650 | arp_ptr+=dev->addr_len; | 650 | arp_ptr += dev->addr_len; |
651 | memcpy(arp_ptr, &dest_ip, 4); | 651 | memcpy(arp_ptr, &dest_ip, 4); |
652 | 652 | ||
653 | return skb; | 653 | return skb; |
@@ -818,18 +818,18 @@ static int arp_process(struct sk_buff *skb) | |||
818 | addr_type = rt->rt_type; | 818 | addr_type = rt->rt_type; |
819 | 819 | ||
820 | if (addr_type == RTN_LOCAL) { | 820 | if (addr_type == RTN_LOCAL) { |
821 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 821 | int dont_send = 0; |
822 | if (n) { | ||
823 | int dont_send = 0; | ||
824 | |||
825 | if (!dont_send) | ||
826 | dont_send |= arp_ignore(in_dev,sip,tip); | ||
827 | if (!dont_send && IN_DEV_ARPFILTER(in_dev)) | ||
828 | dont_send |= arp_filter(sip,tip,dev); | ||
829 | if (!dont_send) | ||
830 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); | ||
831 | 822 | ||
832 | neigh_release(n); | 823 | if (!dont_send) |
824 | dont_send |= arp_ignore(in_dev,sip,tip); | ||
825 | if (!dont_send && IN_DEV_ARPFILTER(in_dev)) | ||
826 | dont_send |= arp_filter(sip,tip,dev); | ||
827 | if (!dont_send) { | ||
828 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | ||
829 | if (n) { | ||
830 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); | ||
831 | neigh_release(n); | ||
832 | } | ||
833 | } | 833 | } |
834 | goto out; | 834 | goto out; |
835 | } else if (IN_DEV_FORWARD(in_dev)) { | 835 | } else if (IN_DEV_FORWARD(in_dev)) { |
@@ -1308,7 +1308,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, | |||
1308 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 1308 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) |
1309 | } | 1309 | } |
1310 | #endif | 1310 | #endif |
1311 | sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key)); | 1311 | sprintf(tbuf, "%pI4", n->primary_key); |
1312 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", | 1312 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", |
1313 | tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); | 1313 | tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); |
1314 | read_unlock(&n->lock); | 1314 | read_unlock(&n->lock); |
@@ -1321,7 +1321,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq, | |||
1321 | int hatype = dev ? dev->type : 0; | 1321 | int hatype = dev ? dev->type : 0; |
1322 | char tbuf[16]; | 1322 | char tbuf[16]; |
1323 | 1323 | ||
1324 | sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key)); | 1324 | sprintf(tbuf, "%pI4", n->key); |
1325 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", | 1325 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", |
1326 | tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", | 1326 | tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", |
1327 | dev ? dev->name : "*"); | 1327 | dev ? dev->name : "*"); |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2e78f6bd9775..e52799047a5f 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -490,7 +490,6 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) | |||
490 | } | 490 | } |
491 | 491 | ||
492 | atomic_set(&doi_def->refcount, 1); | 492 | atomic_set(&doi_def->refcount, 1); |
493 | INIT_RCU_HEAD(&doi_def->rcu); | ||
494 | 493 | ||
495 | spin_lock(&cipso_v4_doi_list_lock); | 494 | spin_lock(&cipso_v4_doi_list_lock); |
496 | if (cipso_v4_doi_search(doi_def->doi) != NULL) | 495 | if (cipso_v4_doi_search(doi_def->doi) != NULL) |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 56fce3ab6c55..309997edc8a5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -112,13 +112,7 @@ static inline void devinet_sysctl_unregister(struct in_device *idev) | |||
112 | 112 | ||
113 | static struct in_ifaddr *inet_alloc_ifa(void) | 113 | static struct in_ifaddr *inet_alloc_ifa(void) |
114 | { | 114 | { |
115 | struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); | 115 | return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); |
116 | |||
117 | if (ifa) { | ||
118 | INIT_RCU_HEAD(&ifa->rcu_head); | ||
119 | } | ||
120 | |||
121 | return ifa; | ||
122 | } | 116 | } |
123 | 117 | ||
124 | static void inet_rcu_free_ifa(struct rcu_head *head) | 118 | static void inet_rcu_free_ifa(struct rcu_head *head) |
@@ -161,7 +155,6 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
161 | in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); | 155 | in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); |
162 | if (!in_dev) | 156 | if (!in_dev) |
163 | goto out; | 157 | goto out; |
164 | INIT_RCU_HEAD(&in_dev->rcu_head); | ||
165 | memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, | 158 | memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, |
166 | sizeof(in_dev->cnf)); | 159 | sizeof(in_dev->cnf)); |
167 | in_dev->cnf.sysctl = NULL; | 160 | in_dev->cnf.sysctl = NULL; |
@@ -1108,7 +1101,7 @@ out: | |||
1108 | } | 1101 | } |
1109 | 1102 | ||
1110 | static struct notifier_block ip_netdev_notifier = { | 1103 | static struct notifier_block ip_netdev_notifier = { |
1111 | .notifier_call =inetdev_event, | 1104 | .notifier_call = inetdev_event, |
1112 | }; | 1105 | }; |
1113 | 1106 | ||
1114 | static inline size_t inet_nlmsg_size(void) | 1107 | static inline size_t inet_nlmsg_size(void) |
@@ -1195,7 +1188,7 @@ done: | |||
1195 | return skb->len; | 1188 | return skb->len; |
1196 | } | 1189 | } |
1197 | 1190 | ||
1198 | static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, | 1191 | static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, |
1199 | u32 pid) | 1192 | u32 pid) |
1200 | { | 1193 | { |
1201 | struct sk_buff *skb; | 1194 | struct sk_buff *skb; |
@@ -1262,7 +1255,7 @@ static void inet_forward_change(struct net *net) | |||
1262 | } | 1255 | } |
1263 | 1256 | ||
1264 | static int devinet_conf_proc(ctl_table *ctl, int write, | 1257 | static int devinet_conf_proc(ctl_table *ctl, int write, |
1265 | struct file* filp, void __user *buffer, | 1258 | struct file *filp, void __user *buffer, |
1266 | size_t *lenp, loff_t *ppos) | 1259 | size_t *lenp, loff_t *ppos) |
1267 | { | 1260 | { |
1268 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 1261 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
@@ -1334,7 +1327,7 @@ static int devinet_conf_sysctl(ctl_table *table, | |||
1334 | } | 1327 | } |
1335 | 1328 | ||
1336 | static int devinet_sysctl_forward(ctl_table *ctl, int write, | 1329 | static int devinet_sysctl_forward(ctl_table *ctl, int write, |
1337 | struct file* filp, void __user *buffer, | 1330 | struct file *filp, void __user *buffer, |
1338 | size_t *lenp, loff_t *ppos) | 1331 | size_t *lenp, loff_t *ppos) |
1339 | { | 1332 | { |
1340 | int *valp = ctl->data; | 1333 | int *valp = ctl->data; |
@@ -1363,7 +1356,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, | |||
1363 | } | 1356 | } |
1364 | 1357 | ||
1365 | int ipv4_doint_and_flush(ctl_table *ctl, int write, | 1358 | int ipv4_doint_and_flush(ctl_table *ctl, int write, |
1366 | struct file* filp, void __user *buffer, | 1359 | struct file *filp, void __user *buffer, |
1367 | size_t *lenp, loff_t *ppos) | 1360 | size_t *lenp, loff_t *ppos) |
1368 | { | 1361 | { |
1369 | int *valp = ctl->data; | 1362 | int *valp = ctl->data; |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 21515d4c49eb..18bb383ea393 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -413,15 +413,16 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) | |||
413 | 413 | ||
414 | static void esp4_err(struct sk_buff *skb, u32 info) | 414 | static void esp4_err(struct sk_buff *skb, u32 info) |
415 | { | 415 | { |
416 | struct iphdr *iph = (struct iphdr*)skb->data; | 416 | struct net *net = dev_net(skb->dev); |
417 | struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); | 417 | struct iphdr *iph = (struct iphdr *)skb->data; |
418 | struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); | ||
418 | struct xfrm_state *x; | 419 | struct xfrm_state *x; |
419 | 420 | ||
420 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || | 421 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || |
421 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) | 422 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
422 | return; | 423 | return; |
423 | 424 | ||
424 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 425 | x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
425 | if (!x) | 426 | if (!x) |
426 | return; | 427 | return; |
427 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 428 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
@@ -618,6 +619,7 @@ static struct net_protocol esp4_protocol = { | |||
618 | .handler = xfrm4_rcv, | 619 | .handler = xfrm4_rcv, |
619 | .err_handler = esp4_err, | 620 | .err_handler = esp4_err, |
620 | .no_policy = 1, | 621 | .no_policy = 1, |
622 | .netns_ok = 1, | ||
621 | }; | 623 | }; |
622 | 624 | ||
623 | static int __init esp4_init(void) | 625 | static int __init esp4_init(void) |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 65c1503f8cc8..741e4fa3e474 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -578,7 +578,7 @@ errout: | |||
578 | return err; | 578 | return err; |
579 | } | 579 | } |
580 | 580 | ||
581 | static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 581 | static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
582 | { | 582 | { |
583 | struct net *net = sock_net(skb->sk); | 583 | struct net *net = sock_net(skb->sk); |
584 | struct fib_config cfg; | 584 | struct fib_config cfg; |
@@ -600,7 +600,7 @@ errout: | |||
600 | return err; | 600 | return err; |
601 | } | 601 | } |
602 | 602 | ||
603 | static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 603 | static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
604 | { | 604 | { |
605 | struct net *net = sock_net(skb->sk); | 605 | struct net *net = sock_net(skb->sk); |
606 | struct fib_config cfg; | 606 | struct fib_config cfg; |
@@ -903,7 +903,7 @@ static void fib_disable_ip(struct net_device *dev, int force) | |||
903 | 903 | ||
904 | static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) | 904 | static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) |
905 | { | 905 | { |
906 | struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; | 906 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; |
907 | struct net_device *dev = ifa->ifa_dev->dev; | 907 | struct net_device *dev = ifa->ifa_dev->dev; |
908 | 908 | ||
909 | switch (event) { | 909 | switch (event) { |
@@ -964,11 +964,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
964 | } | 964 | } |
965 | 965 | ||
966 | static struct notifier_block fib_inetaddr_notifier = { | 966 | static struct notifier_block fib_inetaddr_notifier = { |
967 | .notifier_call =fib_inetaddr_event, | 967 | .notifier_call = fib_inetaddr_event, |
968 | }; | 968 | }; |
969 | 969 | ||
970 | static struct notifier_block fib_netdev_notifier = { | 970 | static struct notifier_block fib_netdev_notifier = { |
971 | .notifier_call =fib_netdev_event, | 971 | .notifier_call = fib_netdev_event, |
972 | }; | 972 | }; |
973 | 973 | ||
974 | static int __net_init ip_fib_net_init(struct net *net) | 974 | static int __net_init ip_fib_net_init(struct net *net) |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index c8cac6c7f881..ded8c44fb848 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -247,7 +247,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
247 | { | 247 | { |
248 | int err; | 248 | int err; |
249 | struct fn_zone *fz; | 249 | struct fn_zone *fz; |
250 | struct fn_hash *t = (struct fn_hash*)tb->tb_data; | 250 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; |
251 | 251 | ||
252 | read_lock(&fib_hash_lock); | 252 | read_lock(&fib_hash_lock); |
253 | for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { | 253 | for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { |
@@ -283,7 +283,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
283 | struct fib_node *f; | 283 | struct fib_node *f; |
284 | struct fib_info *fi = NULL; | 284 | struct fib_info *fi = NULL; |
285 | struct fib_info *last_resort; | 285 | struct fib_info *last_resort; |
286 | struct fn_hash *t = (struct fn_hash*)tb->tb_data; | 286 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; |
287 | struct fn_zone *fz = t->fn_zones[0]; | 287 | struct fn_zone *fz = t->fn_zones[0]; |
288 | 288 | ||
289 | if (fz == NULL) | 289 | if (fz == NULL) |
@@ -548,7 +548,7 @@ out: | |||
548 | 548 | ||
549 | static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) | 549 | static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) |
550 | { | 550 | { |
551 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; | 551 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; |
552 | struct fib_node *f; | 552 | struct fib_node *f; |
553 | struct fib_alias *fa, *fa_to_delete; | 553 | struct fib_alias *fa, *fa_to_delete; |
554 | struct fn_zone *fz; | 554 | struct fn_zone *fz; |
@@ -748,7 +748,7 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin | |||
748 | { | 748 | { |
749 | int m, s_m; | 749 | int m, s_m; |
750 | struct fn_zone *fz; | 750 | struct fn_zone *fz; |
751 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; | 751 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; |
752 | 752 | ||
753 | s_m = cb->args[2]; | 753 | s_m = cb->args[2]; |
754 | read_lock(&fib_hash_lock); | 754 | read_lock(&fib_hash_lock); |
@@ -845,10 +845,10 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) | |||
845 | struct hlist_node *node; | 845 | struct hlist_node *node; |
846 | struct fib_node *fn; | 846 | struct fib_node *fn; |
847 | 847 | ||
848 | hlist_for_each_entry(fn,node,iter->hash_head,fn_hash) { | 848 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { |
849 | struct fib_alias *fa; | 849 | struct fib_alias *fa; |
850 | 850 | ||
851 | list_for_each_entry(fa,&fn->fn_alias,fa_list) { | 851 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { |
852 | iter->fn = fn; | 852 | iter->fn = fn; |
853 | iter->fa = fa; | 853 | iter->fa = fa; |
854 | goto out; | 854 | goto out; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ded2ae34eab1..4817dea3bc73 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -63,16 +63,16 @@ static DEFINE_SPINLOCK(fib_multipath_lock); | |||
63 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) | 63 | for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) |
64 | 64 | ||
65 | #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ | 65 | #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ |
66 | for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) | 66 | for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) |
67 | 67 | ||
68 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ | 68 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ |
69 | 69 | ||
70 | /* Hope, that gcc will optimize it to get rid of dummy loop */ | 70 | /* Hope, that gcc will optimize it to get rid of dummy loop */ |
71 | 71 | ||
72 | #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ | 72 | #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ |
73 | for (nhsel=0; nhsel < 1; nhsel++) | 73 | for (nhsel=0; nhsel < 1; nhsel++) |
74 | 74 | ||
75 | #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ | 75 | #define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ |
76 | for (nhsel=0; nhsel < 1; nhsel++) | 76 | for (nhsel=0; nhsel < 1; nhsel++) |
77 | 77 | ||
78 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 78 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ |
@@ -358,7 +358,7 @@ int fib_detect_death(struct fib_info *fi, int order, | |||
358 | state = n->nud_state; | 358 | state = n->nud_state; |
359 | neigh_release(n); | 359 | neigh_release(n); |
360 | } | 360 | } |
361 | if (state==NUD_REACHABLE) | 361 | if (state == NUD_REACHABLE) |
362 | return 0; | 362 | return 0; |
363 | if ((state&NUD_VALID) && order != dflt) | 363 | if ((state&NUD_VALID) && order != dflt) |
364 | return 0; | 364 | return 0; |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5cb72786a8af..ec0ae490f0b6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -2399,8 +2399,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2399 | __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); | 2399 | __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); |
2400 | 2400 | ||
2401 | seq_indent(seq, iter->depth-1); | 2401 | seq_indent(seq, iter->depth-1); |
2402 | seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n", | 2402 | seq_printf(seq, " +-- %pI4/%d %d %d %d\n", |
2403 | NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, | 2403 | &prf, tn->pos, tn->bits, tn->full_children, |
2404 | tn->empty_children); | 2404 | tn->empty_children); |
2405 | 2405 | ||
2406 | } else { | 2406 | } else { |
@@ -2410,7 +2410,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2410 | __be32 val = htonl(l->key); | 2410 | __be32 val = htonl(l->key); |
2411 | 2411 | ||
2412 | seq_indent(seq, iter->depth); | 2412 | seq_indent(seq, iter->depth); |
2413 | seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val)); | 2413 | seq_printf(seq, " |-- %pI4\n", &val); |
2414 | 2414 | ||
2415 | hlist_for_each_entry_rcu(li, node, &l->list, hlist) { | 2415 | hlist_for_each_entry_rcu(li, node, &l->list, hlist) { |
2416 | struct fib_alias *fa; | 2416 | struct fib_alias *fa; |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72b2de76f1cd..705b33b184a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, | |||
321 | } | 321 | } |
322 | 322 | ||
323 | static void icmp_push_reply(struct icmp_bxm *icmp_param, | 323 | static void icmp_push_reply(struct icmp_bxm *icmp_param, |
324 | struct ipcm_cookie *ipc, struct rtable *rt) | 324 | struct ipcm_cookie *ipc, struct rtable **rt) |
325 | { | 325 | { |
326 | struct sock *sk; | 326 | struct sock *sk; |
327 | struct sk_buff *skb; | 327 | struct sk_buff *skb; |
328 | 328 | ||
329 | sk = icmp_sk(dev_net(rt->u.dst.dev)); | 329 | sk = icmp_sk(dev_net((*rt)->u.dst.dev)); |
330 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | 330 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, |
331 | icmp_param->data_len+icmp_param->head_len, | 331 | icmp_param->data_len+icmp_param->head_len, |
332 | icmp_param->head_len, | 332 | icmp_param->head_len, |
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
392 | } | 392 | } |
393 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, | 393 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, |
394 | icmp_param->data.icmph.code)) | 394 | icmp_param->data.icmph.code)) |
395 | icmp_push_reply(icmp_param, &ipc, rt); | 395 | icmp_push_reply(icmp_param, &ipc, &rt); |
396 | ip_rt_put(rt); | 396 | ip_rt_put(rt); |
397 | out_unlock: | 397 | out_unlock: |
398 | icmp_xmit_unlock(sk); | 398 | icmp_xmit_unlock(sk); |
@@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
562 | /* No need to clone since we're just using its address. */ | 562 | /* No need to clone since we're just using its address. */ |
563 | rt2 = rt; | 563 | rt2 = rt; |
564 | 564 | ||
565 | err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); | 565 | err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); |
566 | switch (err) { | 566 | switch (err) { |
567 | case 0: | 567 | case 0: |
568 | if (rt != rt2) | 568 | if (rt != rt2) |
@@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
601 | if (err) | 601 | if (err) |
602 | goto relookup_failed; | 602 | goto relookup_failed; |
603 | 603 | ||
604 | err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, | 604 | err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, |
605 | XFRM_LOOKUP_ICMP); | 605 | XFRM_LOOKUP_ICMP); |
606 | switch (err) { | 606 | switch (err) { |
607 | case 0: | 607 | case 0: |
@@ -635,7 +635,7 @@ route_done: | |||
635 | icmp_param.data_len = room; | 635 | icmp_param.data_len = room; |
636 | icmp_param.head_len = sizeof(struct icmphdr); | 636 | icmp_param.head_len = sizeof(struct icmphdr); |
637 | 637 | ||
638 | icmp_push_reply(&icmp_param, &ipc, rt); | 638 | icmp_push_reply(&icmp_param, &ipc, &rt); |
639 | ende: | 639 | ende: |
640 | ip_rt_put(rt); | 640 | ip_rt_put(rt); |
641 | out_unlock: | 641 | out_unlock: |
@@ -683,10 +683,8 @@ static void icmp_unreach(struct sk_buff *skb) | |||
683 | break; | 683 | break; |
684 | case ICMP_FRAG_NEEDED: | 684 | case ICMP_FRAG_NEEDED: |
685 | if (ipv4_config.no_pmtu_disc) { | 685 | if (ipv4_config.no_pmtu_disc) { |
686 | LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": " | 686 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n", |
687 | "fragmentation needed " | 687 | &iph->daddr); |
688 | "and DF set.\n", | ||
689 | NIPQUAD(iph->daddr)); | ||
690 | } else { | 688 | } else { |
691 | info = ip_rt_frag_needed(net, iph, | 689 | info = ip_rt_frag_needed(net, iph, |
692 | ntohs(icmph->un.frag.mtu), | 690 | ntohs(icmph->un.frag.mtu), |
@@ -696,9 +694,8 @@ static void icmp_unreach(struct sk_buff *skb) | |||
696 | } | 694 | } |
697 | break; | 695 | break; |
698 | case ICMP_SR_FAILED: | 696 | case ICMP_SR_FAILED: |
699 | LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source " | 697 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n", |
700 | "Route Failed.\n", | 698 | &iph->daddr); |
701 | NIPQUAD(iph->daddr)); | ||
702 | break; | 699 | break; |
703 | default: | 700 | default: |
704 | break; | 701 | break; |
@@ -729,12 +726,12 @@ static void icmp_unreach(struct sk_buff *skb) | |||
729 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && | 726 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && |
730 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { | 727 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { |
731 | if (net_ratelimit()) | 728 | if (net_ratelimit()) |
732 | printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP " | 729 | printk(KERN_WARNING "%pI4 sent an invalid ICMP " |
733 | "type %u, code %u " | 730 | "type %u, code %u " |
734 | "error to a broadcast: " NIPQUAD_FMT " on %s\n", | 731 | "error to a broadcast: %pI4 on %s\n", |
735 | NIPQUAD(ip_hdr(skb)->saddr), | 732 | &ip_hdr(skb)->saddr, |
736 | icmph->type, icmph->code, | 733 | icmph->type, icmph->code, |
737 | NIPQUAD(iph->daddr), | 734 | &iph->daddr, |
738 | skb->dev->name); | 735 | skb->dev->name); |
739 | goto out; | 736 | goto out; |
740 | } | 737 | } |
@@ -952,9 +949,8 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
952 | break; | 949 | break; |
953 | } | 950 | } |
954 | if (!ifa && net_ratelimit()) { | 951 | if (!ifa && net_ratelimit()) { |
955 | printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from " | 952 | printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", |
956 | "%s/" NIPQUAD_FMT "\n", | 953 | mp, dev->name, &rt->rt_src); |
957 | NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); | ||
958 | } | 954 | } |
959 | } | 955 | } |
960 | rcu_read_unlock(); | 956 | rcu_read_unlock(); |
@@ -976,9 +972,10 @@ int icmp_rcv(struct sk_buff *skb) | |||
976 | struct net *net = dev_net(rt->u.dst.dev); | 972 | struct net *net = dev_net(rt->u.dst.dev); |
977 | 973 | ||
978 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 974 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
975 | struct sec_path *sp = skb_sec_path(skb); | ||
979 | int nh; | 976 | int nh; |
980 | 977 | ||
981 | if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & | 978 | if (!(sp && sp->xvec[sp->len - 1]->props.flags & |
982 | XFRM_STATE_ICMP)) | 979 | XFRM_STATE_ICMP)) |
983 | goto drop; | 980 | goto drop; |
984 | 981 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a0d86455c53e..9eb6219af615 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -167,7 +167,7 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) | |||
167 | spin_lock_bh(&im->lock); | 167 | spin_lock_bh(&im->lock); |
168 | if (del_timer(&im->timer)) | 168 | if (del_timer(&im->timer)) |
169 | atomic_dec(&im->refcnt); | 169 | atomic_dec(&im->refcnt); |
170 | im->tm_running=0; | 170 | im->tm_running = 0; |
171 | im->reporter = 0; | 171 | im->reporter = 0; |
172 | im->unsolicit_count = 0; | 172 | im->unsolicit_count = 0; |
173 | spin_unlock_bh(&im->lock); | 173 | spin_unlock_bh(&im->lock); |
@@ -176,9 +176,9 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) | |||
176 | /* It must be called with locked im->lock */ | 176 | /* It must be called with locked im->lock */ |
177 | static void igmp_start_timer(struct ip_mc_list *im, int max_delay) | 177 | static void igmp_start_timer(struct ip_mc_list *im, int max_delay) |
178 | { | 178 | { |
179 | int tv=net_random() % max_delay; | 179 | int tv = net_random() % max_delay; |
180 | 180 | ||
181 | im->tm_running=1; | 181 | im->tm_running = 1; |
182 | if (!mod_timer(&im->timer, jiffies+tv+2)) | 182 | if (!mod_timer(&im->timer, jiffies+tv+2)) |
183 | atomic_inc(&im->refcnt); | 183 | atomic_inc(&im->refcnt); |
184 | } | 184 | } |
@@ -207,7 +207,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) | |||
207 | if (del_timer(&im->timer)) { | 207 | if (del_timer(&im->timer)) { |
208 | if ((long)(im->timer.expires-jiffies) < max_delay) { | 208 | if ((long)(im->timer.expires-jiffies) < max_delay) { |
209 | add_timer(&im->timer); | 209 | add_timer(&im->timer); |
210 | im->tm_running=1; | 210 | im->tm_running = 1; |
211 | spin_unlock_bh(&im->lock); | 211 | spin_unlock_bh(&im->lock); |
212 | return; | 212 | return; |
213 | } | 213 | } |
@@ -358,7 +358,7 @@ static int igmpv3_sendpack(struct sk_buff *skb) | |||
358 | 358 | ||
359 | static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) | 359 | static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) |
360 | { | 360 | { |
361 | return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc,type,gdel,sdel); | 361 | return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc, type, gdel, sdel); |
362 | } | 362 | } |
363 | 363 | ||
364 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, | 364 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, |
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
653 | return -1; | 653 | return -1; |
654 | } | 654 | } |
655 | 655 | ||
656 | skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); | 656 | skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); |
657 | if (skb == NULL) { | 657 | if (skb == NULL) { |
658 | ip_rt_put(rt); | 658 | ip_rt_put(rt); |
659 | return -1; | 659 | return -1; |
@@ -682,11 +682,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
682 | ((u8*)&iph[1])[3] = 0; | 682 | ((u8*)&iph[1])[3] = 0; |
683 | 683 | ||
684 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); | 684 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); |
685 | ih->type=type; | 685 | ih->type = type; |
686 | ih->code=0; | 686 | ih->code = 0; |
687 | ih->csum=0; | 687 | ih->csum = 0; |
688 | ih->group=group; | 688 | ih->group = group; |
689 | ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); | 689 | ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); |
690 | 690 | ||
691 | return ip_local_out(skb); | 691 | return ip_local_out(skb); |
692 | } | 692 | } |
@@ -728,7 +728,7 @@ static void igmp_timer_expire(unsigned long data) | |||
728 | struct in_device *in_dev = im->interface; | 728 | struct in_device *in_dev = im->interface; |
729 | 729 | ||
730 | spin_lock(&im->lock); | 730 | spin_lock(&im->lock); |
731 | im->tm_running=0; | 731 | im->tm_running = 0; |
732 | 732 | ||
733 | if (im->unsolicit_count) { | 733 | if (im->unsolicit_count) { |
734 | im->unsolicit_count--; | 734 | im->unsolicit_count--; |
@@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) | |||
997 | --ANK | 997 | --ANK |
998 | */ | 998 | */ |
999 | if (arp_mc_map(addr, buf, dev, 0) == 0) | 999 | if (arp_mc_map(addr, buf, dev, 0) == 0) |
1000 | dev_mc_add(dev,buf,dev->addr_len,0); | 1000 | dev_mc_add(dev, buf, dev->addr_len, 0); |
1001 | } | 1001 | } |
1002 | 1002 | ||
1003 | /* | 1003 | /* |
@@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) | |||
1010 | struct net_device *dev = in_dev->dev; | 1010 | struct net_device *dev = in_dev->dev; |
1011 | 1011 | ||
1012 | if (arp_mc_map(addr, buf, dev, 0) == 0) | 1012 | if (arp_mc_map(addr, buf, dev, 0) == 0) |
1013 | dev_mc_delete(dev,buf,dev->addr_len,0); | 1013 | dev_mc_delete(dev, buf, dev->addr_len, 0); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | #ifdef CONFIG_IP_MULTICAST | 1016 | #ifdef CONFIG_IP_MULTICAST |
@@ -1210,10 +1210,10 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1210 | if (!im) | 1210 | if (!im) |
1211 | goto out; | 1211 | goto out; |
1212 | 1212 | ||
1213 | im->users=1; | 1213 | im->users = 1; |
1214 | im->interface=in_dev; | 1214 | im->interface = in_dev; |
1215 | in_dev_hold(in_dev); | 1215 | in_dev_hold(in_dev); |
1216 | im->multiaddr=addr; | 1216 | im->multiaddr = addr; |
1217 | /* initial mode is (EX, empty) */ | 1217 | /* initial mode is (EX, empty) */ |
1218 | im->sfmode = MCAST_EXCLUDE; | 1218 | im->sfmode = MCAST_EXCLUDE; |
1219 | im->sfcount[MCAST_INCLUDE] = 0; | 1219 | im->sfcount[MCAST_INCLUDE] = 0; |
@@ -1224,7 +1224,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1224 | atomic_set(&im->refcnt, 1); | 1224 | atomic_set(&im->refcnt, 1); |
1225 | spin_lock_init(&im->lock); | 1225 | spin_lock_init(&im->lock); |
1226 | #ifdef CONFIG_IP_MULTICAST | 1226 | #ifdef CONFIG_IP_MULTICAST |
1227 | im->tm_running=0; | 1227 | im->tm_running = 0; |
1228 | setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); | 1228 | setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); |
1229 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; | 1229 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; |
1230 | im->reporter = 0; | 1230 | im->reporter = 0; |
@@ -1232,8 +1232,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1232 | #endif | 1232 | #endif |
1233 | im->loaded = 0; | 1233 | im->loaded = 0; |
1234 | write_lock_bh(&in_dev->mc_list_lock); | 1234 | write_lock_bh(&in_dev->mc_list_lock); |
1235 | im->next=in_dev->mc_list; | 1235 | im->next = in_dev->mc_list; |
1236 | in_dev->mc_list=im; | 1236 | in_dev->mc_list = im; |
1237 | in_dev->mc_count++; | 1237 | in_dev->mc_count++; |
1238 | write_unlock_bh(&in_dev->mc_list_lock); | 1238 | write_unlock_bh(&in_dev->mc_list_lock); |
1239 | #ifdef CONFIG_IP_MULTICAST | 1239 | #ifdef CONFIG_IP_MULTICAST |
@@ -1279,7 +1279,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
1279 | ASSERT_RTNL(); | 1279 | ASSERT_RTNL(); |
1280 | 1280 | ||
1281 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { | 1281 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { |
1282 | if (i->multiaddr==addr) { | 1282 | if (i->multiaddr == addr) { |
1283 | if (--i->users == 0) { | 1283 | if (--i->users == 0) { |
1284 | write_lock_bh(&in_dev->mc_list_lock); | 1284 | write_lock_bh(&in_dev->mc_list_lock); |
1285 | *ip = i->next; | 1285 | *ip = i->next; |
@@ -1738,7 +1738,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1738 | { | 1738 | { |
1739 | int err; | 1739 | int err; |
1740 | __be32 addr = imr->imr_multiaddr.s_addr; | 1740 | __be32 addr = imr->imr_multiaddr.s_addr; |
1741 | struct ip_mc_socklist *iml=NULL, *i; | 1741 | struct ip_mc_socklist *iml = NULL, *i; |
1742 | struct in_device *in_dev; | 1742 | struct in_device *in_dev; |
1743 | struct inet_sock *inet = inet_sk(sk); | 1743 | struct inet_sock *inet = inet_sk(sk); |
1744 | struct net *net = sock_net(sk); | 1744 | struct net *net = sock_net(sk); |
@@ -1769,7 +1769,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1769 | err = -ENOBUFS; | 1769 | err = -ENOBUFS; |
1770 | if (count >= sysctl_igmp_max_memberships) | 1770 | if (count >= sysctl_igmp_max_memberships) |
1771 | goto done; | 1771 | goto done; |
1772 | iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); | 1772 | iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); |
1773 | if (iml == NULL) | 1773 | if (iml == NULL) |
1774 | goto done; | 1774 | goto done; |
1775 | 1775 | ||
@@ -2275,6 +2275,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p | |||
2275 | 2275 | ||
2276 | #if defined(CONFIG_PROC_FS) | 2276 | #if defined(CONFIG_PROC_FS) |
2277 | struct igmp_mc_iter_state { | 2277 | struct igmp_mc_iter_state { |
2278 | struct seq_net_private p; | ||
2278 | struct net_device *dev; | 2279 | struct net_device *dev; |
2279 | struct in_device *in_dev; | 2280 | struct in_device *in_dev; |
2280 | }; | 2281 | }; |
@@ -2283,11 +2284,12 @@ struct igmp_mc_iter_state { | |||
2283 | 2284 | ||
2284 | static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) | 2285 | static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) |
2285 | { | 2286 | { |
2287 | struct net *net = seq_file_net(seq); | ||
2286 | struct ip_mc_list *im = NULL; | 2288 | struct ip_mc_list *im = NULL; |
2287 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2289 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2288 | 2290 | ||
2289 | state->in_dev = NULL; | 2291 | state->in_dev = NULL; |
2290 | for_each_netdev(&init_net, state->dev) { | 2292 | for_each_netdev(net, state->dev) { |
2291 | struct in_device *in_dev; | 2293 | struct in_device *in_dev; |
2292 | in_dev = in_dev_get(state->dev); | 2294 | in_dev = in_dev_get(state->dev); |
2293 | if (!in_dev) | 2295 | if (!in_dev) |
@@ -2408,7 +2410,7 @@ static const struct seq_operations igmp_mc_seq_ops = { | |||
2408 | 2410 | ||
2409 | static int igmp_mc_seq_open(struct inode *inode, struct file *file) | 2411 | static int igmp_mc_seq_open(struct inode *inode, struct file *file) |
2410 | { | 2412 | { |
2411 | return seq_open_private(file, &igmp_mc_seq_ops, | 2413 | return seq_open_net(inode, file, &igmp_mc_seq_ops, |
2412 | sizeof(struct igmp_mc_iter_state)); | 2414 | sizeof(struct igmp_mc_iter_state)); |
2413 | } | 2415 | } |
2414 | 2416 | ||
@@ -2417,10 +2419,11 @@ static const struct file_operations igmp_mc_seq_fops = { | |||
2417 | .open = igmp_mc_seq_open, | 2419 | .open = igmp_mc_seq_open, |
2418 | .read = seq_read, | 2420 | .read = seq_read, |
2419 | .llseek = seq_lseek, | 2421 | .llseek = seq_lseek, |
2420 | .release = seq_release_private, | 2422 | .release = seq_release_net, |
2421 | }; | 2423 | }; |
2422 | 2424 | ||
2423 | struct igmp_mcf_iter_state { | 2425 | struct igmp_mcf_iter_state { |
2426 | struct seq_net_private p; | ||
2424 | struct net_device *dev; | 2427 | struct net_device *dev; |
2425 | struct in_device *idev; | 2428 | struct in_device *idev; |
2426 | struct ip_mc_list *im; | 2429 | struct ip_mc_list *im; |
@@ -2430,13 +2433,14 @@ struct igmp_mcf_iter_state { | |||
2430 | 2433 | ||
2431 | static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) | 2434 | static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) |
2432 | { | 2435 | { |
2436 | struct net *net = seq_file_net(seq); | ||
2433 | struct ip_sf_list *psf = NULL; | 2437 | struct ip_sf_list *psf = NULL; |
2434 | struct ip_mc_list *im = NULL; | 2438 | struct ip_mc_list *im = NULL; |
2435 | struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); | 2439 | struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); |
2436 | 2440 | ||
2437 | state->idev = NULL; | 2441 | state->idev = NULL; |
2438 | state->im = NULL; | 2442 | state->im = NULL; |
2439 | for_each_netdev(&init_net, state->dev) { | 2443 | for_each_netdev(net, state->dev) { |
2440 | struct in_device *idev; | 2444 | struct in_device *idev; |
2441 | idev = in_dev_get(state->dev); | 2445 | idev = in_dev_get(state->dev); |
2442 | if (unlikely(idev == NULL)) | 2446 | if (unlikely(idev == NULL)) |
@@ -2567,7 +2571,7 @@ static const struct seq_operations igmp_mcf_seq_ops = { | |||
2567 | 2571 | ||
2568 | static int igmp_mcf_seq_open(struct inode *inode, struct file *file) | 2572 | static int igmp_mcf_seq_open(struct inode *inode, struct file *file) |
2569 | { | 2573 | { |
2570 | return seq_open_private(file, &igmp_mcf_seq_ops, | 2574 | return seq_open_net(inode, file, &igmp_mcf_seq_ops, |
2571 | sizeof(struct igmp_mcf_iter_state)); | 2575 | sizeof(struct igmp_mcf_iter_state)); |
2572 | } | 2576 | } |
2573 | 2577 | ||
@@ -2576,14 +2580,41 @@ static const struct file_operations igmp_mcf_seq_fops = { | |||
2576 | .open = igmp_mcf_seq_open, | 2580 | .open = igmp_mcf_seq_open, |
2577 | .read = seq_read, | 2581 | .read = seq_read, |
2578 | .llseek = seq_lseek, | 2582 | .llseek = seq_lseek, |
2579 | .release = seq_release_private, | 2583 | .release = seq_release_net, |
2580 | }; | 2584 | }; |
2581 | 2585 | ||
2582 | int __init igmp_mc_proc_init(void) | 2586 | static int igmp_net_init(struct net *net) |
2583 | { | 2587 | { |
2584 | proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); | 2588 | struct proc_dir_entry *pde; |
2585 | proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); | 2589 | |
2590 | pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops); | ||
2591 | if (!pde) | ||
2592 | goto out_igmp; | ||
2593 | pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); | ||
2594 | if (!pde) | ||
2595 | goto out_mcfilter; | ||
2586 | return 0; | 2596 | return 0; |
2597 | |||
2598 | out_mcfilter: | ||
2599 | proc_net_remove(net, "igmp"); | ||
2600 | out_igmp: | ||
2601 | return -ENOMEM; | ||
2602 | } | ||
2603 | |||
2604 | static void igmp_net_exit(struct net *net) | ||
2605 | { | ||
2606 | proc_net_remove(net, "mcfilter"); | ||
2607 | proc_net_remove(net, "igmp"); | ||
2608 | } | ||
2609 | |||
2610 | static struct pernet_operations igmp_net_ops = { | ||
2611 | .init = igmp_net_init, | ||
2612 | .exit = igmp_net_exit, | ||
2613 | }; | ||
2614 | |||
2615 | int __init igmp_mc_proc_init(void) | ||
2616 | { | ||
2617 | return register_pernet_subsys(&igmp_net_ops); | ||
2587 | } | 2618 | } |
2588 | #endif | 2619 | #endif |
2589 | 2620 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bd1278a2d828..c7cda1ca8e65 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
109 | hashinfo->bhash_size)]; | 109 | hashinfo->bhash_size)]; |
110 | spin_lock(&head->lock); | 110 | spin_lock(&head->lock); |
111 | inet_bind_bucket_for_each(tb, node, &head->chain) | 111 | inet_bind_bucket_for_each(tb, node, &head->chain) |
112 | if (tb->ib_net == net && tb->port == rover) | 112 | if (ib_net(tb) == net && tb->port == rover) |
113 | goto next; | 113 | goto next; |
114 | break; | 114 | break; |
115 | next: | 115 | next: |
@@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
137 | hashinfo->bhash_size)]; | 137 | hashinfo->bhash_size)]; |
138 | spin_lock(&head->lock); | 138 | spin_lock(&head->lock); |
139 | inet_bind_bucket_for_each(tb, node, &head->chain) | 139 | inet_bind_bucket_for_each(tb, node, &head->chain) |
140 | if (tb->ib_net == net && tb->port == snum) | 140 | if (ib_net(tb) == net && tb->port == snum) |
141 | goto tb_found; | 141 | goto tb_found; |
142 | } | 142 | } |
143 | tb = NULL; | 143 | tb = NULL; |
@@ -323,7 +323,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | |||
323 | 323 | ||
324 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | 324 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); |
325 | 325 | ||
326 | struct dst_entry* inet_csk_route_req(struct sock *sk, | 326 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
327 | const struct request_sock *req) | 327 | const struct request_sock *req) |
328 | { | 328 | { |
329 | struct rtable *rt; | 329 | struct rtable *rt; |
@@ -344,16 +344,17 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, | |||
344 | struct net *net = sock_net(sk); | 344 | struct net *net = sock_net(sk); |
345 | 345 | ||
346 | security_req_classify_flow(req, &fl); | 346 | security_req_classify_flow(req, &fl); |
347 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { | 347 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) |
348 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 348 | goto no_route; |
349 | return NULL; | 349 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
350 | } | 350 | goto route_err; |
351 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
352 | ip_rt_put(rt); | ||
353 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | ||
354 | return NULL; | ||
355 | } | ||
356 | return &rt->u.dst; | 351 | return &rt->u.dst; |
352 | |||
353 | route_err: | ||
354 | ip_rt_put(rt); | ||
355 | no_route: | ||
356 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | ||
357 | return NULL; | ||
357 | } | 358 | } |
358 | 359 | ||
359 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | 360 | EXPORT_SYMBOL_GPL(inet_csk_route_req); |
@@ -561,7 +562,7 @@ void inet_csk_destroy_sock(struct sock *sk) | |||
561 | 562 | ||
562 | sk_refcnt_debug_release(sk); | 563 | sk_refcnt_debug_release(sk); |
563 | 564 | ||
564 | atomic_dec(sk->sk_prot->orphan_count); | 565 | percpu_counter_dec(sk->sk_prot->orphan_count); |
565 | sock_put(sk); | 566 | sock_put(sk); |
566 | } | 567 | } |
567 | 568 | ||
@@ -632,6 +633,8 @@ void inet_csk_listen_stop(struct sock *sk) | |||
632 | 633 | ||
633 | acc_req = req->dl_next; | 634 | acc_req = req->dl_next; |
634 | 635 | ||
636 | percpu_counter_inc(sk->sk_prot->orphan_count); | ||
637 | |||
635 | local_bh_disable(); | 638 | local_bh_disable(); |
636 | bh_lock_sock(child); | 639 | bh_lock_sock(child); |
637 | WARN_ON(sock_owned_by_user(child)); | 640 | WARN_ON(sock_owned_by_user(child)); |
@@ -641,8 +644,6 @@ void inet_csk_listen_stop(struct sock *sk) | |||
641 | 644 | ||
642 | sock_orphan(child); | 645 | sock_orphan(child); |
643 | 646 | ||
644 | atomic_inc(sk->sk_prot->orphan_count); | ||
645 | |||
646 | inet_csk_destroy_sock(child); | 647 | inet_csk_destroy_sock(child); |
647 | 648 | ||
648 | bh_unlock_sock(child); | 649 | bh_unlock_sock(child); |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb8..588a7796e3e3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
718 | if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) | 718 | if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) |
719 | goto skip_listen_ht; | 719 | goto skip_listen_ht; |
720 | 720 | ||
721 | inet_listen_lock(hashinfo); | ||
722 | for (i = s_i; i < INET_LHTABLE_SIZE; i++) { | 721 | for (i = s_i; i < INET_LHTABLE_SIZE; i++) { |
723 | struct sock *sk; | 722 | struct sock *sk; |
724 | struct hlist_node *node; | 723 | struct hlist_nulls_node *node; |
724 | struct inet_listen_hashbucket *ilb; | ||
725 | 725 | ||
726 | num = 0; | 726 | num = 0; |
727 | sk_for_each(sk, node, &hashinfo->listening_hash[i]) { | 727 | ilb = &hashinfo->listening_hash[i]; |
728 | spin_lock_bh(&ilb->lock); | ||
729 | sk_nulls_for_each(sk, node, &ilb->head) { | ||
728 | struct inet_sock *inet = inet_sk(sk); | 730 | struct inet_sock *inet = inet_sk(sk); |
729 | 731 | ||
730 | if (num < s_num) { | 732 | if (num < s_num) { |
@@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
742 | goto syn_recv; | 744 | goto syn_recv; |
743 | 745 | ||
744 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 746 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { |
745 | inet_listen_unlock(hashinfo); | 747 | spin_unlock_bh(&ilb->lock); |
746 | goto done; | 748 | goto done; |
747 | } | 749 | } |
748 | 750 | ||
@@ -751,7 +753,7 @@ syn_recv: | |||
751 | goto next_listen; | 753 | goto next_listen; |
752 | 754 | ||
753 | if (inet_diag_dump_reqs(skb, sk, cb) < 0) { | 755 | if (inet_diag_dump_reqs(skb, sk, cb) < 0) { |
754 | inet_listen_unlock(hashinfo); | 756 | spin_unlock_bh(&ilb->lock); |
755 | goto done; | 757 | goto done; |
756 | } | 758 | } |
757 | 759 | ||
@@ -760,12 +762,12 @@ next_listen: | |||
760 | cb->args[4] = 0; | 762 | cb->args[4] = 0; |
761 | ++num; | 763 | ++num; |
762 | } | 764 | } |
765 | spin_unlock_bh(&ilb->lock); | ||
763 | 766 | ||
764 | s_num = 0; | 767 | s_num = 0; |
765 | cb->args[3] = 0; | 768 | cb->args[3] = 0; |
766 | cb->args[4] = 0; | 769 | cb->args[4] = 0; |
767 | } | 770 | } |
768 | inet_listen_unlock(hashinfo); | ||
769 | skip_listen_ht: | 771 | skip_listen_ht: |
770 | cb->args[0] = 1; | 772 | cb->args[0] = 1; |
771 | s_i = num = s_num = 0; | 773 | s_i = num = s_num = 0; |
@@ -776,20 +778,21 @@ skip_listen_ht: | |||
776 | 778 | ||
777 | for (i = s_i; i < hashinfo->ehash_size; i++) { | 779 | for (i = s_i; i < hashinfo->ehash_size; i++) { |
778 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; | 780 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; |
779 | rwlock_t *lock = inet_ehash_lockp(hashinfo, i); | 781 | spinlock_t *lock = inet_ehash_lockp(hashinfo, i); |
780 | struct sock *sk; | 782 | struct sock *sk; |
781 | struct hlist_node *node; | 783 | struct hlist_nulls_node *node; |
782 | 784 | ||
783 | num = 0; | 785 | num = 0; |
784 | 786 | ||
785 | if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) | 787 | if (hlist_nulls_empty(&head->chain) && |
788 | hlist_nulls_empty(&head->twchain)) | ||
786 | continue; | 789 | continue; |
787 | 790 | ||
788 | if (i > s_i) | 791 | if (i > s_i) |
789 | s_num = 0; | 792 | s_num = 0; |
790 | 793 | ||
791 | read_lock_bh(lock); | 794 | spin_lock_bh(lock); |
792 | sk_for_each(sk, node, &head->chain) { | 795 | sk_nulls_for_each(sk, node, &head->chain) { |
793 | struct inet_sock *inet = inet_sk(sk); | 796 | struct inet_sock *inet = inet_sk(sk); |
794 | 797 | ||
795 | if (num < s_num) | 798 | if (num < s_num) |
@@ -803,7 +806,7 @@ skip_listen_ht: | |||
803 | r->id.idiag_dport) | 806 | r->id.idiag_dport) |
804 | goto next_normal; | 807 | goto next_normal; |
805 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 808 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { |
806 | read_unlock_bh(lock); | 809 | spin_unlock_bh(lock); |
807 | goto done; | 810 | goto done; |
808 | } | 811 | } |
809 | next_normal: | 812 | next_normal: |
@@ -825,14 +828,14 @@ next_normal: | |||
825 | r->id.idiag_dport) | 828 | r->id.idiag_dport) |
826 | goto next_dying; | 829 | goto next_dying; |
827 | if (inet_twsk_diag_dump(tw, skb, cb) < 0) { | 830 | if (inet_twsk_diag_dump(tw, skb, cb) < 0) { |
828 | read_unlock_bh(lock); | 831 | spin_unlock_bh(lock); |
829 | goto done; | 832 | goto done; |
830 | } | 833 | } |
831 | next_dying: | 834 | next_dying: |
832 | ++num; | 835 | ++num; |
833 | } | 836 | } |
834 | } | 837 | } |
835 | read_unlock_bh(lock); | 838 | spin_unlock_bh(lock); |
836 | } | 839 | } |
837 | 840 | ||
838 | done: | 841 | done: |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb91..6a1045da48d2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, | |||
35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); | 35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); |
36 | 36 | ||
37 | if (tb != NULL) { | 37 | if (tb != NULL) { |
38 | tb->ib_net = hold_net(net); | 38 | write_pnet(&tb->ib_net, hold_net(net)); |
39 | tb->port = snum; | 39 | tb->port = snum; |
40 | tb->fastreuse = 0; | 40 | tb->fastreuse = 0; |
41 | INIT_HLIST_HEAD(&tb->owners); | 41 | INIT_HLIST_HEAD(&tb->owners); |
@@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket | |||
51 | { | 51 | { |
52 | if (hlist_empty(&tb->owners)) { | 52 | if (hlist_empty(&tb->owners)) { |
53 | __hlist_del(&tb->node); | 53 | __hlist_del(&tb->node); |
54 | release_net(tb->ib_net); | 54 | release_net(ib_net(tb)); |
55 | kmem_cache_free(cachep, tb); | 55 | kmem_cache_free(cachep, tb); |
56 | } | 56 | } |
57 | } | 57 | } |
@@ -110,33 +110,29 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) | |||
110 | 110 | ||
111 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 111 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
112 | 112 | ||
113 | /* | 113 | static inline int compute_score(struct sock *sk, struct net *net, |
114 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | 114 | const unsigned short hnum, const __be32 daddr, |
115 | * Look, when several writers sleep and reader wakes them up, all but one | 115 | const int dif) |
116 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
117 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
118 | * exclusive lock release). It should be ifdefed really. | ||
119 | */ | ||
120 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) | ||
121 | __acquires(hashinfo->lhash_lock) | ||
122 | { | 116 | { |
123 | write_lock(&hashinfo->lhash_lock); | 117 | int score = -1; |
124 | 118 | struct inet_sock *inet = inet_sk(sk); | |
125 | if (atomic_read(&hashinfo->lhash_users)) { | ||
126 | DEFINE_WAIT(wait); | ||
127 | 119 | ||
128 | for (;;) { | 120 | if (net_eq(sock_net(sk), net) && inet->num == hnum && |
129 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, | 121 | !ipv6_only_sock(sk)) { |
130 | &wait, TASK_UNINTERRUPTIBLE); | 122 | __be32 rcv_saddr = inet->rcv_saddr; |
131 | if (!atomic_read(&hashinfo->lhash_users)) | 123 | score = sk->sk_family == PF_INET ? 1 : 0; |
132 | break; | 124 | if (rcv_saddr) { |
133 | write_unlock_bh(&hashinfo->lhash_lock); | 125 | if (rcv_saddr != daddr) |
134 | schedule(); | 126 | return -1; |
135 | write_lock_bh(&hashinfo->lhash_lock); | 127 | score += 2; |
128 | } | ||
129 | if (sk->sk_bound_dev_if) { | ||
130 | if (sk->sk_bound_dev_if != dif) | ||
131 | return -1; | ||
132 | score += 2; | ||
136 | } | 133 | } |
137 | |||
138 | finish_wait(&hashinfo->lhash_wait, &wait); | ||
139 | } | 134 | } |
135 | return score; | ||
140 | } | 136 | } |
141 | 137 | ||
142 | /* | 138 | /* |
@@ -145,72 +141,48 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo) | |||
145 | * remote address for the connection. So always assume those are both | 141 | * remote address for the connection. So always assume those are both |
146 | * wildcarded during the search since they can never be otherwise. | 142 | * wildcarded during the search since they can never be otherwise. |
147 | */ | 143 | */ |
148 | static struct sock *inet_lookup_listener_slow(struct net *net, | ||
149 | const struct hlist_head *head, | ||
150 | const __be32 daddr, | ||
151 | const unsigned short hnum, | ||
152 | const int dif) | ||
153 | { | ||
154 | struct sock *result = NULL, *sk; | ||
155 | const struct hlist_node *node; | ||
156 | int hiscore = -1; | ||
157 | |||
158 | sk_for_each(sk, node, head) { | ||
159 | const struct inet_sock *inet = inet_sk(sk); | ||
160 | |||
161 | if (net_eq(sock_net(sk), net) && inet->num == hnum && | ||
162 | !ipv6_only_sock(sk)) { | ||
163 | const __be32 rcv_saddr = inet->rcv_saddr; | ||
164 | int score = sk->sk_family == PF_INET ? 1 : 0; | ||
165 | |||
166 | if (rcv_saddr) { | ||
167 | if (rcv_saddr != daddr) | ||
168 | continue; | ||
169 | score += 2; | ||
170 | } | ||
171 | if (sk->sk_bound_dev_if) { | ||
172 | if (sk->sk_bound_dev_if != dif) | ||
173 | continue; | ||
174 | score += 2; | ||
175 | } | ||
176 | if (score == 5) | ||
177 | return sk; | ||
178 | if (score > hiscore) { | ||
179 | hiscore = score; | ||
180 | result = sk; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return result; | ||
185 | } | ||
186 | 144 | ||
187 | /* Optimize the common listener case. */ | 145 | |
188 | struct sock *__inet_lookup_listener(struct net *net, | 146 | struct sock *__inet_lookup_listener(struct net *net, |
189 | struct inet_hashinfo *hashinfo, | 147 | struct inet_hashinfo *hashinfo, |
190 | const __be32 daddr, const unsigned short hnum, | 148 | const __be32 daddr, const unsigned short hnum, |
191 | const int dif) | 149 | const int dif) |
192 | { | 150 | { |
193 | struct sock *sk = NULL; | 151 | struct sock *sk, *result; |
194 | const struct hlist_head *head; | 152 | struct hlist_nulls_node *node; |
195 | 153 | unsigned int hash = inet_lhashfn(net, hnum); | |
196 | read_lock(&hashinfo->lhash_lock); | 154 | struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; |
197 | head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; | 155 | int score, hiscore; |
198 | if (!hlist_empty(head)) { | 156 | |
199 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | 157 | rcu_read_lock(); |
200 | 158 | begin: | |
201 | if (inet->num == hnum && !sk->sk_node.next && | 159 | result = NULL; |
202 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 160 | hiscore = -1; |
203 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 161 | sk_nulls_for_each_rcu(sk, node, &ilb->head) { |
204 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) | 162 | score = compute_score(sk, net, hnum, daddr, dif); |
205 | goto sherry_cache; | 163 | if (score > hiscore) { |
206 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); | 164 | result = sk; |
165 | hiscore = score; | ||
166 | } | ||
207 | } | 167 | } |
208 | if (sk) { | 168 | /* |
209 | sherry_cache: | 169 | * if the nulls value we got at the end of this lookup is |
210 | sock_hold(sk); | 170 | * not the expected one, we must restart lookup. |
171 | * We probably met an item that was moved to another chain. | ||
172 | */ | ||
173 | if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) | ||
174 | goto begin; | ||
175 | if (result) { | ||
176 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
177 | result = NULL; | ||
178 | else if (unlikely(compute_score(result, net, hnum, daddr, | ||
179 | dif) < hiscore)) { | ||
180 | sock_put(result); | ||
181 | goto begin; | ||
182 | } | ||
211 | } | 183 | } |
212 | read_unlock(&hashinfo->lhash_lock); | 184 | rcu_read_unlock(); |
213 | return sk; | 185 | return result; |
214 | } | 186 | } |
215 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 187 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
216 | 188 | ||
@@ -223,35 +195,65 @@ struct sock * __inet_lookup_established(struct net *net, | |||
223 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 195 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
224 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); | 196 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
225 | struct sock *sk; | 197 | struct sock *sk; |
226 | const struct hlist_node *node; | 198 | const struct hlist_nulls_node *node; |
227 | /* Optimize here for direct hit, only listening connections can | 199 | /* Optimize here for direct hit, only listening connections can |
228 | * have wildcards anyways. | 200 | * have wildcards anyways. |
229 | */ | 201 | */ |
230 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); | 202 | unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); |
231 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 203 | unsigned int slot = hash & (hashinfo->ehash_size - 1); |
232 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | 204 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
233 | 205 | ||
234 | prefetch(head->chain.first); | 206 | rcu_read_lock(); |
235 | read_lock(lock); | 207 | begin: |
236 | sk_for_each(sk, node, &head->chain) { | 208 | sk_nulls_for_each_rcu(sk, node, &head->chain) { |
237 | if (INET_MATCH(sk, net, hash, acookie, | 209 | if (INET_MATCH(sk, net, hash, acookie, |
238 | saddr, daddr, ports, dif)) | 210 | saddr, daddr, ports, dif)) { |
239 | goto hit; /* You sunk my battleship! */ | 211 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) |
212 | goto begintw; | ||
213 | if (unlikely(!INET_MATCH(sk, net, hash, acookie, | ||
214 | saddr, daddr, ports, dif))) { | ||
215 | sock_put(sk); | ||
216 | goto begin; | ||
217 | } | ||
218 | goto out; | ||
219 | } | ||
240 | } | 220 | } |
221 | /* | ||
222 | * if the nulls value we got at the end of this lookup is | ||
223 | * not the expected one, we must restart lookup. | ||
224 | * We probably met an item that was moved to another chain. | ||
225 | */ | ||
226 | if (get_nulls_value(node) != slot) | ||
227 | goto begin; | ||
241 | 228 | ||
229 | begintw: | ||
242 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 230 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
243 | sk_for_each(sk, node, &head->twchain) { | 231 | sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
244 | if (INET_TW_MATCH(sk, net, hash, acookie, | 232 | if (INET_TW_MATCH(sk, net, hash, acookie, |
245 | saddr, daddr, ports, dif)) | 233 | saddr, daddr, ports, dif)) { |
246 | goto hit; | 234 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { |
235 | sk = NULL; | ||
236 | goto out; | ||
237 | } | ||
238 | if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, | ||
239 | saddr, daddr, ports, dif))) { | ||
240 | sock_put(sk); | ||
241 | goto begintw; | ||
242 | } | ||
243 | goto out; | ||
244 | } | ||
247 | } | 245 | } |
246 | /* | ||
247 | * if the nulls value we got at the end of this lookup is | ||
248 | * not the expected one, we must restart lookup. | ||
249 | * We probably met an item that was moved to another chain. | ||
250 | */ | ||
251 | if (get_nulls_value(node) != slot) | ||
252 | goto begintw; | ||
248 | sk = NULL; | 253 | sk = NULL; |
249 | out: | 254 | out: |
250 | read_unlock(lock); | 255 | rcu_read_unlock(); |
251 | return sk; | 256 | return sk; |
252 | hit: | ||
253 | sock_hold(sk); | ||
254 | goto out; | ||
255 | } | 257 | } |
256 | EXPORT_SYMBOL_GPL(__inet_lookup_established); | 258 | EXPORT_SYMBOL_GPL(__inet_lookup_established); |
257 | 259 | ||
@@ -270,16 +272,15 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
270 | struct net *net = sock_net(sk); | 272 | struct net *net = sock_net(sk); |
271 | unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); | 273 | unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); |
272 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 274 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
273 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | 275 | spinlock_t *lock = inet_ehash_lockp(hinfo, hash); |
274 | struct sock *sk2; | 276 | struct sock *sk2; |
275 | const struct hlist_node *node; | 277 | const struct hlist_nulls_node *node; |
276 | struct inet_timewait_sock *tw; | 278 | struct inet_timewait_sock *tw; |
277 | 279 | ||
278 | prefetch(head->chain.first); | 280 | spin_lock(lock); |
279 | write_lock(lock); | ||
280 | 281 | ||
281 | /* Check TIME-WAIT sockets first. */ | 282 | /* Check TIME-WAIT sockets first. */ |
282 | sk_for_each(sk2, node, &head->twchain) { | 283 | sk_nulls_for_each(sk2, node, &head->twchain) { |
283 | tw = inet_twsk(sk2); | 284 | tw = inet_twsk(sk2); |
284 | 285 | ||
285 | if (INET_TW_MATCH(sk2, net, hash, acookie, | 286 | if (INET_TW_MATCH(sk2, net, hash, acookie, |
@@ -293,7 +294,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
293 | tw = NULL; | 294 | tw = NULL; |
294 | 295 | ||
295 | /* And established part... */ | 296 | /* And established part... */ |
296 | sk_for_each(sk2, node, &head->chain) { | 297 | sk_nulls_for_each(sk2, node, &head->chain) { |
297 | if (INET_MATCH(sk2, net, hash, acookie, | 298 | if (INET_MATCH(sk2, net, hash, acookie, |
298 | saddr, daddr, ports, dif)) | 299 | saddr, daddr, ports, dif)) |
299 | goto not_unique; | 300 | goto not_unique; |
@@ -306,9 +307,9 @@ unique: | |||
306 | inet->sport = htons(lport); | 307 | inet->sport = htons(lport); |
307 | sk->sk_hash = hash; | 308 | sk->sk_hash = hash; |
308 | WARN_ON(!sk_unhashed(sk)); | 309 | WARN_ON(!sk_unhashed(sk)); |
309 | __sk_add_node(sk, &head->chain); | 310 | __sk_nulls_add_node_rcu(sk, &head->chain); |
311 | spin_unlock(lock); | ||
310 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 312 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
311 | write_unlock(lock); | ||
312 | 313 | ||
313 | if (twp) { | 314 | if (twp) { |
314 | *twp = tw; | 315 | *twp = tw; |
@@ -324,7 +325,7 @@ unique: | |||
324 | return 0; | 325 | return 0; |
325 | 326 | ||
326 | not_unique: | 327 | not_unique: |
327 | write_unlock(lock); | 328 | spin_unlock(lock); |
328 | return -EADDRNOTAVAIL; | 329 | return -EADDRNOTAVAIL; |
329 | } | 330 | } |
330 | 331 | ||
@@ -338,8 +339,8 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) | |||
338 | void __inet_hash_nolisten(struct sock *sk) | 339 | void __inet_hash_nolisten(struct sock *sk) |
339 | { | 340 | { |
340 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 341 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
341 | struct hlist_head *list; | 342 | struct hlist_nulls_head *list; |
342 | rwlock_t *lock; | 343 | spinlock_t *lock; |
343 | struct inet_ehash_bucket *head; | 344 | struct inet_ehash_bucket *head; |
344 | 345 | ||
345 | WARN_ON(!sk_unhashed(sk)); | 346 | WARN_ON(!sk_unhashed(sk)); |
@@ -349,18 +350,17 @@ void __inet_hash_nolisten(struct sock *sk) | |||
349 | list = &head->chain; | 350 | list = &head->chain; |
350 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 351 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
351 | 352 | ||
352 | write_lock(lock); | 353 | spin_lock(lock); |
353 | __sk_add_node(sk, list); | 354 | __sk_nulls_add_node_rcu(sk, list); |
355 | spin_unlock(lock); | ||
354 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 356 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
355 | write_unlock(lock); | ||
356 | } | 357 | } |
357 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 358 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
358 | 359 | ||
359 | static void __inet_hash(struct sock *sk) | 360 | static void __inet_hash(struct sock *sk) |
360 | { | 361 | { |
361 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 362 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
362 | struct hlist_head *list; | 363 | struct inet_listen_hashbucket *ilb; |
363 | rwlock_t *lock; | ||
364 | 364 | ||
365 | if (sk->sk_state != TCP_LISTEN) { | 365 | if (sk->sk_state != TCP_LISTEN) { |
366 | __inet_hash_nolisten(sk); | 366 | __inet_hash_nolisten(sk); |
@@ -368,14 +368,12 @@ static void __inet_hash(struct sock *sk) | |||
368 | } | 368 | } |
369 | 369 | ||
370 | WARN_ON(!sk_unhashed(sk)); | 370 | WARN_ON(!sk_unhashed(sk)); |
371 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 371 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
372 | lock = &hashinfo->lhash_lock; | ||
373 | 372 | ||
374 | inet_listen_wlock(hashinfo); | 373 | spin_lock(&ilb->lock); |
375 | __sk_add_node(sk, list); | 374 | __sk_nulls_add_node_rcu(sk, &ilb->head); |
376 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 375 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
377 | write_unlock(lock); | 376 | spin_unlock(&ilb->lock); |
378 | wake_up(&hashinfo->lhash_wait); | ||
379 | } | 377 | } |
380 | 378 | ||
381 | void inet_hash(struct sock *sk) | 379 | void inet_hash(struct sock *sk) |
@@ -390,27 +388,23 @@ EXPORT_SYMBOL_GPL(inet_hash); | |||
390 | 388 | ||
391 | void inet_unhash(struct sock *sk) | 389 | void inet_unhash(struct sock *sk) |
392 | { | 390 | { |
393 | rwlock_t *lock; | ||
394 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 391 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
392 | spinlock_t *lock; | ||
393 | int done; | ||
395 | 394 | ||
396 | if (sk_unhashed(sk)) | 395 | if (sk_unhashed(sk)) |
397 | goto out; | 396 | return; |
398 | 397 | ||
399 | if (sk->sk_state == TCP_LISTEN) { | 398 | if (sk->sk_state == TCP_LISTEN) |
400 | local_bh_disable(); | 399 | lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; |
401 | inet_listen_wlock(hashinfo); | 400 | else |
402 | lock = &hashinfo->lhash_lock; | ||
403 | } else { | ||
404 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 401 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
405 | write_lock_bh(lock); | ||
406 | } | ||
407 | 402 | ||
408 | if (__sk_del_node_init(sk)) | 403 | spin_lock_bh(lock); |
404 | done =__sk_nulls_del_node_init_rcu(sk); | ||
405 | if (done) | ||
409 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 406 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
410 | write_unlock_bh(lock); | 407 | spin_unlock_bh(lock); |
411 | out: | ||
412 | if (sk->sk_state == TCP_LISTEN) | ||
413 | wake_up(&hashinfo->lhash_wait); | ||
414 | } | 408 | } |
415 | EXPORT_SYMBOL_GPL(inet_unhash); | 409 | EXPORT_SYMBOL_GPL(inet_unhash); |
416 | 410 | ||
@@ -449,7 +443,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
449 | * unique enough. | 443 | * unique enough. |
450 | */ | 444 | */ |
451 | inet_bind_bucket_for_each(tb, node, &head->chain) { | 445 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
452 | if (tb->ib_net == net && tb->port == port) { | 446 | if (ib_net(tb) == net && tb->port == port) { |
453 | WARN_ON(hlist_empty(&tb->owners)); | 447 | WARN_ON(hlist_empty(&tb->owners)); |
454 | if (tb->fastreuse >= 0) | 448 | if (tb->fastreuse >= 0) |
455 | goto next_port; | 449 | goto next_port; |
@@ -524,3 +518,16 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
524 | } | 518 | } |
525 | 519 | ||
526 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 520 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
521 | |||
522 | void inet_hashinfo_init(struct inet_hashinfo *h) | ||
523 | { | ||
524 | int i; | ||
525 | |||
526 | for (i = 0; i < INET_LHTABLE_SIZE; i++) { | ||
527 | spin_lock_init(&h->listening_hash[i].lock); | ||
528 | INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, | ||
529 | i + LISTENING_NULLS_BASE); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); | ||
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96e..6a667dae315e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c | |||
@@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) | |||
120 | iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); | 120 | iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); |
121 | 121 | ||
122 | tcph->check = 0; | 122 | tcph->check = 0; |
123 | tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); | 123 | tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); |
124 | lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); | 124 | lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); |
125 | tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | 125 | tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, |
126 | lro_desc->ip_tot_len - | 126 | lro_desc->ip_tot_len - |
@@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) | |||
135 | __wsum tcp_ps_hdr_csum; | 135 | __wsum tcp_ps_hdr_csum; |
136 | 136 | ||
137 | tcp_csum = ~csum_unfold(tcph->check); | 137 | tcp_csum = ~csum_unfold(tcph->check); |
138 | tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); | 138 | tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); |
139 | 139 | ||
140 | tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | 140 | tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, |
141 | len + TCP_HDR_LEN(tcph), | 141 | len + TCP_HDR_LEN(tcph), |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f8824..8554d0ea1719 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
20 | struct inet_bind_hashbucket *bhead; | 20 | struct inet_bind_hashbucket *bhead; |
21 | struct inet_bind_bucket *tb; | 21 | struct inet_bind_bucket *tb; |
22 | /* Unlink from established hashes. */ | 22 | /* Unlink from established hashes. */ |
23 | rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); | 23 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); |
24 | 24 | ||
25 | write_lock(lock); | 25 | spin_lock(lock); |
26 | if (hlist_unhashed(&tw->tw_node)) { | 26 | if (hlist_nulls_unhashed(&tw->tw_node)) { |
27 | write_unlock(lock); | 27 | spin_unlock(lock); |
28 | return; | 28 | return; |
29 | } | 29 | } |
30 | __hlist_del(&tw->tw_node); | 30 | hlist_nulls_del_rcu(&tw->tw_node); |
31 | sk_node_init(&tw->tw_node); | 31 | sk_nulls_node_init(&tw->tw_node); |
32 | write_unlock(lock); | 32 | spin_unlock(lock); |
33 | 33 | ||
34 | /* Disassociate with bind bucket. */ | 34 | /* Disassociate with bind bucket. */ |
35 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, | 35 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, |
@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
76 | const struct inet_sock *inet = inet_sk(sk); | 76 | const struct inet_sock *inet = inet_sk(sk); |
77 | const struct inet_connection_sock *icsk = inet_csk(sk); | 77 | const struct inet_connection_sock *icsk = inet_csk(sk); |
78 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); | 78 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); |
79 | rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 79 | spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
80 | struct inet_bind_hashbucket *bhead; | 80 | struct inet_bind_hashbucket *bhead; |
81 | /* Step 1: Put TW into bind hash. Original socket stays there too. | 81 | /* Step 1: Put TW into bind hash. Original socket stays there too. |
82 | Note, that any socket with inet->num != 0 MUST be bound in | 82 | Note, that any socket with inet->num != 0 MUST be bound in |
@@ -90,17 +90,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
90 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); | 90 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); |
91 | spin_unlock(&bhead->lock); | 91 | spin_unlock(&bhead->lock); |
92 | 92 | ||
93 | write_lock(lock); | 93 | spin_lock(lock); |
94 | 94 | ||
95 | /* Step 2: Remove SK from established hash. */ | 95 | /* |
96 | if (__sk_del_node_init(sk)) | 96 | * Step 2: Hash TW into TIMEWAIT chain. |
97 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 97 | * Should be done before removing sk from established chain |
98 | 98 | * because readers are lockless and search established first. | |
99 | /* Step 3: Hash TW into TIMEWAIT chain. */ | 99 | */ |
100 | inet_twsk_add_node(tw, &ehead->twchain); | ||
101 | atomic_inc(&tw->tw_refcnt); | 100 | atomic_inc(&tw->tw_refcnt); |
101 | inet_twsk_add_node_rcu(tw, &ehead->twchain); | ||
102 | 102 | ||
103 | write_unlock(lock); | 103 | /* Step 3: Remove SK from established hash. */ |
104 | if (__sk_nulls_del_node_init_rcu(sk)) | ||
105 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
106 | |||
107 | spin_unlock(lock); | ||
104 | } | 108 | } |
105 | 109 | ||
106 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | 110 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); |
@@ -416,17 +420,17 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | |||
416 | { | 420 | { |
417 | struct inet_timewait_sock *tw; | 421 | struct inet_timewait_sock *tw; |
418 | struct sock *sk; | 422 | struct sock *sk; |
419 | struct hlist_node *node; | 423 | struct hlist_nulls_node *node; |
420 | int h; | 424 | int h; |
421 | 425 | ||
422 | local_bh_disable(); | 426 | local_bh_disable(); |
423 | for (h = 0; h < (hashinfo->ehash_size); h++) { | 427 | for (h = 0; h < (hashinfo->ehash_size); h++) { |
424 | struct inet_ehash_bucket *head = | 428 | struct inet_ehash_bucket *head = |
425 | inet_ehash_bucket(hashinfo, h); | 429 | inet_ehash_bucket(hashinfo, h); |
426 | rwlock_t *lock = inet_ehash_lockp(hashinfo, h); | 430 | spinlock_t *lock = inet_ehash_lockp(hashinfo, h); |
427 | restart: | 431 | restart: |
428 | write_lock(lock); | 432 | spin_lock(lock); |
429 | sk_for_each(sk, node, &head->twchain) { | 433 | sk_nulls_for_each(sk, node, &head->twchain) { |
430 | 434 | ||
431 | tw = inet_twsk(sk); | 435 | tw = inet_twsk(sk); |
432 | if (!net_eq(twsk_net(tw), net) || | 436 | if (!net_eq(twsk_net(tw), net) || |
@@ -434,13 +438,13 @@ restart: | |||
434 | continue; | 438 | continue; |
435 | 439 | ||
436 | atomic_inc(&tw->tw_refcnt); | 440 | atomic_inc(&tw->tw_refcnt); |
437 | write_unlock(lock); | 441 | spin_unlock(lock); |
438 | inet_twsk_deschedule(tw, twdr); | 442 | inet_twsk_deschedule(tw, twdr); |
439 | inet_twsk_put(tw); | 443 | inet_twsk_put(tw); |
440 | 444 | ||
441 | goto restart; | 445 | goto restart; |
442 | } | 446 | } |
443 | write_unlock(lock); | 447 | spin_unlock(lock); |
444 | } | 448 | } |
445 | local_bh_enable(); | 449 | local_bh_enable(); |
446 | } | 450 | } |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a456ceeac3f2..b1fbe18feb5a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -144,7 +144,7 @@ static void unlink_from_unused(struct inet_peer *p) | |||
144 | * _stack is known to be NULL or not at compile time, | 144 | * _stack is known to be NULL or not at compile time, |
145 | * so compiler will optimize the if (_stack) tests. | 145 | * so compiler will optimize the if (_stack) tests. |
146 | */ | 146 | */ |
147 | #define lookup(_daddr,_stack) \ | 147 | #define lookup(_daddr, _stack) \ |
148 | ({ \ | 148 | ({ \ |
149 | struct inet_peer *u, **v; \ | 149 | struct inet_peer *u, **v; \ |
150 | if (_stack != NULL) { \ | 150 | if (_stack != NULL) { \ |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 450016b89a18..df3fe50bbf0d 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb) | |||
106 | * We now generate an ICMP HOST REDIRECT giving the route | 106 | * We now generate an ICMP HOST REDIRECT giving the route |
107 | * we calculated. | 107 | * we calculated. |
108 | */ | 108 | */ |
109 | if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) | 109 | if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) |
110 | ip_rt_send_redirect(skb); | 110 | ip_rt_send_redirect(skb); |
111 | 111 | ||
112 | skb->priority = rt_tos2priority(iph->tos); | 112 | skb->priority = rt_tos2priority(iph->tos); |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e4f81f54befe..6659ac000eeb 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -56,7 +56,7 @@ struct ipfrag_skb_cb | |||
56 | int offset; | 56 | int offset; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | #define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) | 59 | #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
60 | 60 | ||
61 | /* Describe an entry in the "incomplete datagrams" queue. */ | 61 | /* Describe an entry in the "incomplete datagrams" queue. */ |
62 | struct ipq { | 62 | struct ipq { |
@@ -559,9 +559,8 @@ out_nomem: | |||
559 | goto out_fail; | 559 | goto out_fail; |
560 | out_oversize: | 560 | out_oversize: |
561 | if (net_ratelimit()) | 561 | if (net_ratelimit()) |
562 | printk(KERN_INFO | 562 | printk(KERN_INFO "Oversized IP packet from %pI4.\n", |
563 | "Oversized IP packet from " NIPQUAD_FMT ".\n", | 563 | &qp->saddr); |
564 | NIPQUAD(qp->saddr)); | ||
565 | out_fail: | 564 | out_fail: |
566 | IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); | 565 | IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); |
567 | return err; | 566 | return err; |
@@ -608,7 +607,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
608 | .data = &init_net.ipv4.frags.high_thresh, | 607 | .data = &init_net.ipv4.frags.high_thresh, |
609 | .maxlen = sizeof(int), | 608 | .maxlen = sizeof(int), |
610 | .mode = 0644, | 609 | .mode = 0644, |
611 | .proc_handler = &proc_dointvec | 610 | .proc_handler = proc_dointvec |
612 | }, | 611 | }, |
613 | { | 612 | { |
614 | .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, | 613 | .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, |
@@ -616,7 +615,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
616 | .data = &init_net.ipv4.frags.low_thresh, | 615 | .data = &init_net.ipv4.frags.low_thresh, |
617 | .maxlen = sizeof(int), | 616 | .maxlen = sizeof(int), |
618 | .mode = 0644, | 617 | .mode = 0644, |
619 | .proc_handler = &proc_dointvec | 618 | .proc_handler = proc_dointvec |
620 | }, | 619 | }, |
621 | { | 620 | { |
622 | .ctl_name = NET_IPV4_IPFRAG_TIME, | 621 | .ctl_name = NET_IPV4_IPFRAG_TIME, |
@@ -624,8 +623,8 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
624 | .data = &init_net.ipv4.frags.timeout, | 623 | .data = &init_net.ipv4.frags.timeout, |
625 | .maxlen = sizeof(int), | 624 | .maxlen = sizeof(int), |
626 | .mode = 0644, | 625 | .mode = 0644, |
627 | .proc_handler = &proc_dointvec_jiffies, | 626 | .proc_handler = proc_dointvec_jiffies, |
628 | .strategy = &sysctl_jiffies | 627 | .strategy = sysctl_jiffies |
629 | }, | 628 | }, |
630 | { } | 629 | { } |
631 | }; | 630 | }; |
@@ -637,15 +636,15 @@ static struct ctl_table ip4_frags_ctl_table[] = { | |||
637 | .data = &ip4_frags.secret_interval, | 636 | .data = &ip4_frags.secret_interval, |
638 | .maxlen = sizeof(int), | 637 | .maxlen = sizeof(int), |
639 | .mode = 0644, | 638 | .mode = 0644, |
640 | .proc_handler = &proc_dointvec_jiffies, | 639 | .proc_handler = proc_dointvec_jiffies, |
641 | .strategy = &sysctl_jiffies | 640 | .strategy = sysctl_jiffies |
642 | }, | 641 | }, |
643 | { | 642 | { |
644 | .procname = "ipfrag_max_dist", | 643 | .procname = "ipfrag_max_dist", |
645 | .data = &sysctl_ipfrag_max_dist, | 644 | .data = &sysctl_ipfrag_max_dist, |
646 | .maxlen = sizeof(int), | 645 | .maxlen = sizeof(int), |
647 | .mode = 0644, | 646 | .mode = 0644, |
648 | .proc_handler = &proc_dointvec_minmax, | 647 | .proc_handler = proc_dointvec_minmax, |
649 | .extra1 = &zero | 648 | .extra1 = &zero |
650 | }, | 649 | }, |
651 | { } | 650 | { } |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 85c487b8572b..0101521f366b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -126,8 +126,6 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); | |||
126 | 126 | ||
127 | /* Fallback tunnel: no source, no destination, no key, no options */ | 127 | /* Fallback tunnel: no source, no destination, no key, no options */ |
128 | 128 | ||
129 | static int ipgre_fb_tunnel_init(struct net_device *dev); | ||
130 | |||
131 | #define HASH_SIZE 16 | 129 | #define HASH_SIZE 16 |
132 | 130 | ||
133 | static int ipgre_net_id; | 131 | static int ipgre_net_id; |
@@ -371,7 +369,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
371 | by themself??? | 369 | by themself??? |
372 | */ | 370 | */ |
373 | 371 | ||
374 | struct iphdr *iph = (struct iphdr*)skb->data; | 372 | struct iphdr *iph = (struct iphdr *)skb->data; |
375 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 373 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); |
376 | int grehlen = (iph->ihl<<2) + 4; | 374 | int grehlen = (iph->ihl<<2) + 4; |
377 | const int type = icmp_hdr(skb)->type; | 375 | const int type = icmp_hdr(skb)->type; |
@@ -632,7 +630,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
632 | 630 | ||
633 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { | 631 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { |
634 | gre_hlen = 0; | 632 | gre_hlen = 0; |
635 | tiph = (struct iphdr*)skb->data; | 633 | tiph = (struct iphdr *)skb->data; |
636 | } else { | 634 | } else { |
637 | gre_hlen = tunnel->hlen; | 635 | gre_hlen = tunnel->hlen; |
638 | tiph = &tunnel->parms.iph; | 636 | tiph = &tunnel->parms.iph; |
@@ -660,7 +658,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
660 | if (neigh == NULL) | 658 | if (neigh == NULL) |
661 | goto tx_error; | 659 | goto tx_error; |
662 | 660 | ||
663 | addr6 = (struct in6_addr*)&neigh->primary_key; | 661 | addr6 = (struct in6_addr *)&neigh->primary_key; |
664 | addr_type = ipv6_addr_type(addr6); | 662 | addr_type = ipv6_addr_type(addr6); |
665 | 663 | ||
666 | if (addr_type == IPV6_ADDR_ANY) { | 664 | if (addr_type == IPV6_ADDR_ANY) { |
@@ -726,7 +724,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
726 | } | 724 | } |
727 | #ifdef CONFIG_IPV6 | 725 | #ifdef CONFIG_IPV6 |
728 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 726 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
729 | struct rt6_info *rt6 = (struct rt6_info*)skb->dst; | 727 | struct rt6_info *rt6 = (struct rt6_info *)skb->dst; |
730 | 728 | ||
731 | if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { | 729 | if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { |
732 | if ((tunnel->parms.iph.daddr && | 730 | if ((tunnel->parms.iph.daddr && |
@@ -800,7 +798,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
800 | iph->ttl = old_iph->ttl; | 798 | iph->ttl = old_iph->ttl; |
801 | #ifdef CONFIG_IPV6 | 799 | #ifdef CONFIG_IPV6 |
802 | else if (skb->protocol == htons(ETH_P_IPV6)) | 800 | else if (skb->protocol == htons(ETH_P_IPV6)) |
803 | iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; | 801 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; |
804 | #endif | 802 | #endif |
805 | else | 803 | else |
806 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | 804 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); |
@@ -962,7 +960,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
962 | break; | 960 | break; |
963 | } | 961 | } |
964 | } else { | 962 | } else { |
965 | unsigned nflags=0; | 963 | unsigned nflags = 0; |
966 | 964 | ||
967 | t = netdev_priv(dev); | 965 | t = netdev_priv(dev); |
968 | 966 | ||
@@ -1104,7 +1102,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
1104 | 1102 | ||
1105 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) | 1103 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) |
1106 | { | 1104 | { |
1107 | struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); | 1105 | struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); |
1108 | memcpy(haddr, &iph->saddr, 4); | 1106 | memcpy(haddr, &iph->saddr, 4); |
1109 | return 4; | 1107 | return 4; |
1110 | } | 1108 | } |
@@ -1142,6 +1140,7 @@ static int ipgre_open(struct net_device *dev) | |||
1142 | static int ipgre_close(struct net_device *dev) | 1140 | static int ipgre_close(struct net_device *dev) |
1143 | { | 1141 | { |
1144 | struct ip_tunnel *t = netdev_priv(dev); | 1142 | struct ip_tunnel *t = netdev_priv(dev); |
1143 | |||
1145 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { | 1144 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { |
1146 | struct in_device *in_dev; | 1145 | struct in_device *in_dev; |
1147 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); | 1146 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); |
@@ -1155,14 +1154,22 @@ static int ipgre_close(struct net_device *dev) | |||
1155 | 1154 | ||
1156 | #endif | 1155 | #endif |
1157 | 1156 | ||
1157 | static const struct net_device_ops ipgre_netdev_ops = { | ||
1158 | .ndo_init = ipgre_tunnel_init, | ||
1159 | .ndo_uninit = ipgre_tunnel_uninit, | ||
1160 | #ifdef CONFIG_NET_IPGRE_BROADCAST | ||
1161 | .ndo_open = ipgre_open, | ||
1162 | .ndo_stop = ipgre_close, | ||
1163 | #endif | ||
1164 | .ndo_start_xmit = ipgre_tunnel_xmit, | ||
1165 | .ndo_do_ioctl = ipgre_tunnel_ioctl, | ||
1166 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | ||
1167 | }; | ||
1168 | |||
1158 | static void ipgre_tunnel_setup(struct net_device *dev) | 1169 | static void ipgre_tunnel_setup(struct net_device *dev) |
1159 | { | 1170 | { |
1160 | dev->init = ipgre_tunnel_init; | 1171 | dev->netdev_ops = &ipgre_netdev_ops; |
1161 | dev->uninit = ipgre_tunnel_uninit; | ||
1162 | dev->destructor = free_netdev; | 1172 | dev->destructor = free_netdev; |
1163 | dev->hard_start_xmit = ipgre_tunnel_xmit; | ||
1164 | dev->do_ioctl = ipgre_tunnel_ioctl; | ||
1165 | dev->change_mtu = ipgre_tunnel_change_mtu; | ||
1166 | 1173 | ||
1167 | dev->type = ARPHRD_IPGRE; | 1174 | dev->type = ARPHRD_IPGRE; |
1168 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; | 1175 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; |
@@ -1194,8 +1201,6 @@ static int ipgre_tunnel_init(struct net_device *dev) | |||
1194 | return -EINVAL; | 1201 | return -EINVAL; |
1195 | dev->flags = IFF_BROADCAST; | 1202 | dev->flags = IFF_BROADCAST; |
1196 | dev->header_ops = &ipgre_header_ops; | 1203 | dev->header_ops = &ipgre_header_ops; |
1197 | dev->open = ipgre_open; | ||
1198 | dev->stop = ipgre_close; | ||
1199 | } | 1204 | } |
1200 | #endif | 1205 | #endif |
1201 | } else | 1206 | } else |
@@ -1204,7 +1209,7 @@ static int ipgre_tunnel_init(struct net_device *dev) | |||
1204 | return 0; | 1209 | return 0; |
1205 | } | 1210 | } |
1206 | 1211 | ||
1207 | static int ipgre_fb_tunnel_init(struct net_device *dev) | 1212 | static void ipgre_fb_tunnel_init(struct net_device *dev) |
1208 | { | 1213 | { |
1209 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1214 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1210 | struct iphdr *iph = &tunnel->parms.iph; | 1215 | struct iphdr *iph = &tunnel->parms.iph; |
@@ -1220,7 +1225,6 @@ static int ipgre_fb_tunnel_init(struct net_device *dev) | |||
1220 | 1225 | ||
1221 | dev_hold(dev); | 1226 | dev_hold(dev); |
1222 | ign->tunnels_wc[0] = tunnel; | 1227 | ign->tunnels_wc[0] = tunnel; |
1223 | return 0; | ||
1224 | } | 1228 | } |
1225 | 1229 | ||
1226 | 1230 | ||
@@ -1264,9 +1268,9 @@ static int ipgre_init_net(struct net *net) | |||
1264 | err = -ENOMEM; | 1268 | err = -ENOMEM; |
1265 | goto err_alloc_dev; | 1269 | goto err_alloc_dev; |
1266 | } | 1270 | } |
1267 | |||
1268 | ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; | ||
1269 | dev_net_set(ign->fb_tunnel_dev, net); | 1271 | dev_net_set(ign->fb_tunnel_dev, net); |
1272 | |||
1273 | ipgre_fb_tunnel_init(ign->fb_tunnel_dev); | ||
1270 | ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; | 1274 | ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; |
1271 | 1275 | ||
1272 | if ((err = register_netdev(ign->fb_tunnel_dev))) | 1276 | if ((err = register_netdev(ign->fb_tunnel_dev))) |
@@ -1397,16 +1401,22 @@ static int ipgre_tap_init(struct net_device *dev) | |||
1397 | return 0; | 1401 | return 0; |
1398 | } | 1402 | } |
1399 | 1403 | ||
1404 | static const struct net_device_ops ipgre_tap_netdev_ops = { | ||
1405 | .ndo_init = ipgre_tap_init, | ||
1406 | .ndo_uninit = ipgre_tunnel_uninit, | ||
1407 | .ndo_start_xmit = ipgre_tunnel_xmit, | ||
1408 | .ndo_set_mac_address = eth_mac_addr, | ||
1409 | .ndo_validate_addr = eth_validate_addr, | ||
1410 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | ||
1411 | }; | ||
1412 | |||
1400 | static void ipgre_tap_setup(struct net_device *dev) | 1413 | static void ipgre_tap_setup(struct net_device *dev) |
1401 | { | 1414 | { |
1402 | 1415 | ||
1403 | ether_setup(dev); | 1416 | ether_setup(dev); |
1404 | 1417 | ||
1405 | dev->init = ipgre_tap_init; | 1418 | dev->netdev_ops = &ipgre_netdev_ops; |
1406 | dev->uninit = ipgre_tunnel_uninit; | ||
1407 | dev->destructor = free_netdev; | 1419 | dev->destructor = free_netdev; |
1408 | dev->hard_start_xmit = ipgre_tunnel_xmit; | ||
1409 | dev->change_mtu = ipgre_tunnel_change_mtu; | ||
1410 | 1420 | ||
1411 | dev->iflink = 0; | 1421 | dev->iflink = 0; |
1412 | dev->features |= NETIF_F_NETNS_LOCAL; | 1422 | dev->features |= NETIF_F_NETNS_LOCAL; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index cfb38ac9d698..1a58a6fa1dc0 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -302,10 +302,8 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
302 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | 302 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { |
303 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 303 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
304 | net_ratelimit()) | 304 | net_ratelimit()) |
305 | printk(KERN_INFO "source route option " | 305 | printk(KERN_INFO "source route option %pI4 -> %pI4\n", |
306 | NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", | 306 | &iph->saddr, &iph->daddr); |
307 | NIPQUAD(iph->saddr), | ||
308 | NIPQUAD(iph->daddr)); | ||
309 | in_dev_put(in_dev); | 307 | in_dev_put(in_dev); |
310 | goto drop; | 308 | goto drop; |
311 | } | 309 | } |
@@ -350,9 +348,9 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
350 | struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); | 348 | struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); |
351 | u32 idx = skb->dst->tclassid; | 349 | u32 idx = skb->dst->tclassid; |
352 | st[idx&0xFF].o_packets++; | 350 | st[idx&0xFF].o_packets++; |
353 | st[idx&0xFF].o_bytes+=skb->len; | 351 | st[idx&0xFF].o_bytes += skb->len; |
354 | st[(idx>>16)&0xFF].i_packets++; | 352 | st[(idx>>16)&0xFF].i_packets++; |
355 | st[(idx>>16)&0xFF].i_bytes+=skb->len; | 353 | st[(idx>>16)&0xFF].i_bytes += skb->len; |
356 | } | 354 | } |
357 | #endif | 355 | #endif |
358 | 356 | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d2a8f8bb78a6..8ebe86dd72af 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -430,7 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
430 | * single device frame, and queue such a frame for sending. | 430 | * single device frame, and queue such a frame for sending. |
431 | */ | 431 | */ |
432 | 432 | ||
433 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | 433 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
434 | { | 434 | { |
435 | struct iphdr *iph; | 435 | struct iphdr *iph; |
436 | int raw = 0; | 436 | int raw = 0; |
@@ -720,7 +720,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
720 | int getfrag(void *from, char *to, int offset, int len, | 720 | int getfrag(void *from, char *to, int offset, int len, |
721 | int odd, struct sk_buff *skb), | 721 | int odd, struct sk_buff *skb), |
722 | void *from, int length, int hh_len, int fragheaderlen, | 722 | void *from, int length, int hh_len, int fragheaderlen, |
723 | int transhdrlen, int mtu,unsigned int flags) | 723 | int transhdrlen, int mtu, unsigned int flags) |
724 | { | 724 | { |
725 | struct sk_buff *skb; | 725 | struct sk_buff *skb; |
726 | int err; | 726 | int err; |
@@ -741,7 +741,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
741 | skb_reserve(skb, hh_len); | 741 | skb_reserve(skb, hh_len); |
742 | 742 | ||
743 | /* create space for UDP/IP header */ | 743 | /* create space for UDP/IP header */ |
744 | skb_put(skb,fragheaderlen + transhdrlen); | 744 | skb_put(skb, fragheaderlen + transhdrlen); |
745 | 745 | ||
746 | /* initialize network header pointer */ | 746 | /* initialize network header pointer */ |
747 | skb_reset_network_header(skb); | 747 | skb_reset_network_header(skb); |
@@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk, | |||
778 | int getfrag(void *from, char *to, int offset, int len, | 778 | int getfrag(void *from, char *to, int offset, int len, |
779 | int odd, struct sk_buff *skb), | 779 | int odd, struct sk_buff *skb), |
780 | void *from, int length, int transhdrlen, | 780 | void *from, int length, int transhdrlen, |
781 | struct ipcm_cookie *ipc, struct rtable *rt, | 781 | struct ipcm_cookie *ipc, struct rtable **rtp, |
782 | unsigned int flags) | 782 | unsigned int flags) |
783 | { | 783 | { |
784 | struct inet_sock *inet = inet_sk(sk); | 784 | struct inet_sock *inet = inet_sk(sk); |
@@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk, | |||
793 | int offset = 0; | 793 | int offset = 0; |
794 | unsigned int maxfraglen, fragheaderlen; | 794 | unsigned int maxfraglen, fragheaderlen; |
795 | int csummode = CHECKSUM_NONE; | 795 | int csummode = CHECKSUM_NONE; |
796 | struct rtable *rt; | ||
796 | 797 | ||
797 | if (flags&MSG_PROBE) | 798 | if (flags&MSG_PROBE) |
798 | return 0; | 799 | return 0; |
@@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk, | |||
812 | inet->cork.flags |= IPCORK_OPT; | 813 | inet->cork.flags |= IPCORK_OPT; |
813 | inet->cork.addr = ipc->addr; | 814 | inet->cork.addr = ipc->addr; |
814 | } | 815 | } |
815 | dst_hold(&rt->u.dst); | 816 | rt = *rtp; |
817 | /* | ||
818 | * We steal reference to this route, caller should not release it | ||
819 | */ | ||
820 | *rtp = NULL; | ||
816 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 821 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
817 | rt->u.dst.dev->mtu : | 822 | rt->u.dst.dev->mtu : |
818 | dst_mtu(rt->u.dst.path); | 823 | dst_mtu(rt->u.dst.path); |
@@ -1279,7 +1284,12 @@ int ip_push_pending_frames(struct sock *sk) | |||
1279 | 1284 | ||
1280 | skb->priority = sk->sk_priority; | 1285 | skb->priority = sk->sk_priority; |
1281 | skb->mark = sk->sk_mark; | 1286 | skb->mark = sk->sk_mark; |
1282 | skb->dst = dst_clone(&rt->u.dst); | 1287 | /* |
1288 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | ||
1289 | * on dst refcount | ||
1290 | */ | ||
1291 | inet->cork.dst = NULL; | ||
1292 | skb->dst = &rt->u.dst; | ||
1283 | 1293 | ||
1284 | if (iph->protocol == IPPROTO_ICMP) | 1294 | if (iph->protocol == IPPROTO_ICMP) |
1285 | icmp_out_count(net, ((struct icmphdr *) | 1295 | icmp_out_count(net, ((struct icmphdr *) |
@@ -1391,7 +1401,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1391 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1401 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1392 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1402 | sk->sk_bound_dev_if = arg->bound_dev_if; |
1393 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1403 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1394 | &ipc, rt, MSG_DONTWAIT); | 1404 | &ipc, &rt, MSG_DONTWAIT); |
1395 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1405 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1396 | if (arg->csumoffset >= 0) | 1406 | if (arg->csumoffset >= 0) |
1397 | *((__sum16 *)skb_transport_header(skb) + | 1407 | *((__sum16 *)skb_transport_header(skb) + |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 465abf0a9869..43c05854d752 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #define IP_CMSG_RECVOPTS 8 | 48 | #define IP_CMSG_RECVOPTS 8 |
49 | #define IP_CMSG_RETOPTS 16 | 49 | #define IP_CMSG_RETOPTS 16 |
50 | #define IP_CMSG_PASSSEC 32 | 50 | #define IP_CMSG_PASSSEC 32 |
51 | #define IP_CMSG_ORIGDSTADDR 64 | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * SOL_IP control messages. | 54 | * SOL_IP control messages. |
@@ -94,7 +95,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) | |||
94 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) | 95 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) |
95 | { | 96 | { |
96 | unsigned char optbuf[sizeof(struct ip_options) + 40]; | 97 | unsigned char optbuf[sizeof(struct ip_options) + 40]; |
97 | struct ip_options * opt = (struct ip_options*)optbuf; | 98 | struct ip_options * opt = (struct ip_options *)optbuf; |
98 | 99 | ||
99 | if (IPCB(skb)->opt.optlen == 0) | 100 | if (IPCB(skb)->opt.optlen == 0) |
100 | return; | 101 | return; |
@@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) | |||
126 | security_release_secctx(secdata, seclen); | 127 | security_release_secctx(secdata, seclen); |
127 | } | 128 | } |
128 | 129 | ||
130 | static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) | ||
131 | { | ||
132 | struct sockaddr_in sin; | ||
133 | struct iphdr *iph = ip_hdr(skb); | ||
134 | __be16 *ports = (__be16 *)skb_transport_header(skb); | ||
135 | |||
136 | if (skb_transport_offset(skb) + 4 > skb->len) | ||
137 | return; | ||
138 | |||
139 | /* All current transport protocols have the port numbers in the | ||
140 | * first four bytes of the transport header and this function is | ||
141 | * written with this assumption in mind. | ||
142 | */ | ||
143 | |||
144 | sin.sin_family = AF_INET; | ||
145 | sin.sin_addr.s_addr = iph->daddr; | ||
146 | sin.sin_port = ports[1]; | ||
147 | memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); | ||
148 | |||
149 | put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); | ||
150 | } | ||
129 | 151 | ||
130 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) | 152 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) |
131 | { | 153 | { |
@@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) | |||
160 | 182 | ||
161 | if (flags & 1) | 183 | if (flags & 1) |
162 | ip_cmsg_recv_security(msg, skb); | 184 | ip_cmsg_recv_security(msg, skb); |
185 | |||
186 | if ((flags>>=1) == 0) | ||
187 | return; | ||
188 | if (flags & 1) | ||
189 | ip_cmsg_recv_dstaddr(msg, skb); | ||
190 | |||
163 | } | 191 | } |
164 | 192 | ||
165 | int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) | 193 | int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) |
@@ -411,7 +439,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
411 | int optname, char __user *optval, int optlen) | 439 | int optname, char __user *optval, int optlen) |
412 | { | 440 | { |
413 | struct inet_sock *inet = inet_sk(sk); | 441 | struct inet_sock *inet = inet_sk(sk); |
414 | int val=0,err; | 442 | int val = 0, err; |
415 | 443 | ||
416 | if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | | 444 | if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | |
417 | (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | | 445 | (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | |
@@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
421 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | | 449 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | |
422 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || | 450 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || |
423 | optname == IP_MULTICAST_TTL || | 451 | optname == IP_MULTICAST_TTL || |
424 | optname == IP_MULTICAST_LOOP) { | 452 | optname == IP_MULTICAST_LOOP || |
453 | optname == IP_RECVORIGDSTADDR) { | ||
425 | if (optlen >= sizeof(int)) { | 454 | if (optlen >= sizeof(int)) { |
426 | if (get_user(val, (int __user *) optval)) | 455 | if (get_user(val, (int __user *) optval)) |
427 | return -EFAULT; | 456 | return -EFAULT; |
@@ -437,7 +466,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
437 | /* If optlen==0, it is equivalent to val == 0 */ | 466 | /* If optlen==0, it is equivalent to val == 0 */ |
438 | 467 | ||
439 | if (ip_mroute_opt(optname)) | 468 | if (ip_mroute_opt(optname)) |
440 | return ip_mroute_setsockopt(sk,optname,optval,optlen); | 469 | return ip_mroute_setsockopt(sk, optname, optval, optlen); |
441 | 470 | ||
442 | err = 0; | 471 | err = 0; |
443 | lock_sock(sk); | 472 | lock_sock(sk); |
@@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
509 | else | 538 | else |
510 | inet->cmsg_flags &= ~IP_CMSG_PASSSEC; | 539 | inet->cmsg_flags &= ~IP_CMSG_PASSSEC; |
511 | break; | 540 | break; |
541 | case IP_RECVORIGDSTADDR: | ||
542 | if (val) | ||
543 | inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; | ||
544 | else | ||
545 | inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; | ||
546 | break; | ||
512 | case IP_TOS: /* This sets both TOS and Precedence */ | 547 | case IP_TOS: /* This sets both TOS and Precedence */ |
513 | if (sk->sk_type == SOCK_STREAM) { | 548 | if (sk->sk_type == SOCK_STREAM) { |
514 | val &= ~3; | 549 | val &= ~3; |
@@ -549,7 +584,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
549 | goto e_inval; | 584 | goto e_inval; |
550 | if (optlen<1) | 585 | if (optlen<1) |
551 | goto e_inval; | 586 | goto e_inval; |
552 | if (val==-1) | 587 | if (val == -1) |
553 | val = 1; | 588 | val = 1; |
554 | if (val < 0 || val > 255) | 589 | if (val < 0 || val > 255) |
555 | goto e_inval; | 590 | goto e_inval; |
@@ -573,12 +608,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
573 | 608 | ||
574 | err = -EFAULT; | 609 | err = -EFAULT; |
575 | if (optlen >= sizeof(struct ip_mreqn)) { | 610 | if (optlen >= sizeof(struct ip_mreqn)) { |
576 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | 611 | if (copy_from_user(&mreq, optval, sizeof(mreq))) |
577 | break; | 612 | break; |
578 | } else { | 613 | } else { |
579 | memset(&mreq, 0, sizeof(mreq)); | 614 | memset(&mreq, 0, sizeof(mreq)); |
580 | if (optlen >= sizeof(struct in_addr) && | 615 | if (optlen >= sizeof(struct in_addr) && |
581 | copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) | 616 | copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr))) |
582 | break; | 617 | break; |
583 | } | 618 | } |
584 | 619 | ||
@@ -626,11 +661,11 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
626 | goto e_inval; | 661 | goto e_inval; |
627 | err = -EFAULT; | 662 | err = -EFAULT; |
628 | if (optlen >= sizeof(struct ip_mreqn)) { | 663 | if (optlen >= sizeof(struct ip_mreqn)) { |
629 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | 664 | if (copy_from_user(&mreq, optval, sizeof(mreq))) |
630 | break; | 665 | break; |
631 | } else { | 666 | } else { |
632 | memset(&mreq, 0, sizeof(mreq)); | 667 | memset(&mreq, 0, sizeof(mreq)); |
633 | if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) | 668 | if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) |
634 | break; | 669 | break; |
635 | } | 670 | } |
636 | 671 | ||
@@ -808,7 +843,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
808 | err = -ENOBUFS; | 843 | err = -ENOBUFS; |
809 | break; | 844 | break; |
810 | } | 845 | } |
811 | gsf = kmalloc(optlen,GFP_KERNEL); | 846 | gsf = kmalloc(optlen, GFP_KERNEL); |
812 | if (!gsf) { | 847 | if (!gsf) { |
813 | err = -ENOBUFS; | 848 | err = -ENOBUFS; |
814 | break; | 849 | break; |
@@ -828,7 +863,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
828 | goto mc_msf_out; | 863 | goto mc_msf_out; |
829 | } | 864 | } |
830 | msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); | 865 | msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); |
831 | msf = kmalloc(msize,GFP_KERNEL); | 866 | msf = kmalloc(msize, GFP_KERNEL); |
832 | if (!msf) { | 867 | if (!msf) { |
833 | err = -ENOBUFS; | 868 | err = -ENOBUFS; |
834 | goto mc_msf_out; | 869 | goto mc_msf_out; |
@@ -971,9 +1006,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
971 | return -EOPNOTSUPP; | 1006 | return -EOPNOTSUPP; |
972 | 1007 | ||
973 | if (ip_mroute_opt(optname)) | 1008 | if (ip_mroute_opt(optname)) |
974 | return ip_mroute_getsockopt(sk,optname,optval,optlen); | 1009 | return ip_mroute_getsockopt(sk, optname, optval, optlen); |
975 | 1010 | ||
976 | if (get_user(len,optlen)) | 1011 | if (get_user(len, optlen)) |
977 | return -EFAULT; | 1012 | return -EFAULT; |
978 | if (len < 0) | 1013 | if (len < 0) |
979 | return -EINVAL; | 1014 | return -EINVAL; |
@@ -984,7 +1019,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
984 | case IP_OPTIONS: | 1019 | case IP_OPTIONS: |
985 | { | 1020 | { |
986 | unsigned char optbuf[sizeof(struct ip_options)+40]; | 1021 | unsigned char optbuf[sizeof(struct ip_options)+40]; |
987 | struct ip_options * opt = (struct ip_options*)optbuf; | 1022 | struct ip_options * opt = (struct ip_options *)optbuf; |
988 | opt->optlen = 0; | 1023 | opt->optlen = 0; |
989 | if (inet->opt) | 1024 | if (inet->opt) |
990 | memcpy(optbuf, inet->opt, | 1025 | memcpy(optbuf, inet->opt, |
@@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1022 | case IP_PASSSEC: | 1057 | case IP_PASSSEC: |
1023 | val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; | 1058 | val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; |
1024 | break; | 1059 | break; |
1060 | case IP_RECVORIGDSTADDR: | ||
1061 | val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; | ||
1062 | break; | ||
1025 | case IP_TOS: | 1063 | case IP_TOS: |
1026 | val = inet->tos; | 1064 | val = inet->tos; |
1027 | break; | 1065 | break; |
@@ -1154,13 +1192,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1154 | len = 1; | 1192 | len = 1; |
1155 | if (put_user(len, optlen)) | 1193 | if (put_user(len, optlen)) |
1156 | return -EFAULT; | 1194 | return -EFAULT; |
1157 | if (copy_to_user(optval,&ucval,1)) | 1195 | if (copy_to_user(optval, &ucval, 1)) |
1158 | return -EFAULT; | 1196 | return -EFAULT; |
1159 | } else { | 1197 | } else { |
1160 | len = min_t(unsigned int, sizeof(int), len); | 1198 | len = min_t(unsigned int, sizeof(int), len); |
1161 | if (put_user(len, optlen)) | 1199 | if (put_user(len, optlen)) |
1162 | return -EFAULT; | 1200 | return -EFAULT; |
1163 | if (copy_to_user(optval,&val,len)) | 1201 | if (copy_to_user(optval, &val, len)) |
1164 | return -EFAULT; | 1202 | return -EFAULT; |
1165 | } | 1203 | } |
1166 | return 0; | 1204 | return 0; |
@@ -1178,7 +1216,7 @@ int ip_getsockopt(struct sock *sk, int level, | |||
1178 | !ip_mroute_opt(optname)) { | 1216 | !ip_mroute_opt(optname)) { |
1179 | int len; | 1217 | int len; |
1180 | 1218 | ||
1181 | if (get_user(len,optlen)) | 1219 | if (get_user(len, optlen)) |
1182 | return -EFAULT; | 1220 | return -EFAULT; |
1183 | 1221 | ||
1184 | lock_sock(sk); | 1222 | lock_sock(sk); |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38ccb6dfb02e..3262ce06294c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -35,12 +35,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
35 | return; | 35 | return; |
36 | 36 | ||
37 | spi = htonl(ntohs(ipch->cpi)); | 37 | spi = htonl(ntohs(ipch->cpi)); |
38 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, | 38 | x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, |
39 | spi, IPPROTO_COMP, AF_INET); | 39 | spi, IPPROTO_COMP, AF_INET); |
40 | if (!x) | 40 | if (!x) |
41 | return; | 41 | return; |
42 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n", | 42 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", |
43 | spi, NIPQUAD(iph->daddr)); | 43 | spi, &iph->daddr); |
44 | xfrm_state_put(x); | 44 | xfrm_state_put(x); |
45 | } | 45 | } |
46 | 46 | ||
@@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | |||
49 | { | 49 | { |
50 | struct xfrm_state *t; | 50 | struct xfrm_state *t; |
51 | 51 | ||
52 | t = xfrm_state_alloc(); | 52 | t = xfrm_state_alloc(&init_net); |
53 | if (t == NULL) | 53 | if (t == NULL) |
54 | goto out; | 54 | goto out; |
55 | 55 | ||
@@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) | |||
85 | int err = 0; | 85 | int err = 0; |
86 | struct xfrm_state *t; | 86 | struct xfrm_state *t; |
87 | 87 | ||
88 | t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4, | 88 | t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, |
89 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); | 89 | x->props.saddr.a4, IPPROTO_IPIP, AF_INET); |
90 | if (!t) { | 90 | if (!t) { |
91 | t = ipcomp_tunnel_create(x); | 91 | t = ipcomp_tunnel_create(x); |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42065fff46c4..42a0f3dd3fd6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -374,7 +374,7 @@ static int __init ic_defaults(void) | |||
374 | */ | 374 | */ |
375 | 375 | ||
376 | if (!ic_host_name_set) | 376 | if (!ic_host_name_set) |
377 | sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr)); | 377 | sprintf(init_utsname()->nodename, "%pI4", &ic_myaddr); |
378 | 378 | ||
379 | if (root_server_addr == NONE) | 379 | if (root_server_addr == NONE) |
380 | root_server_addr = ic_servaddr; | 380 | root_server_addr = ic_servaddr; |
@@ -387,11 +387,11 @@ static int __init ic_defaults(void) | |||
387 | else if (IN_CLASSC(ntohl(ic_myaddr))) | 387 | else if (IN_CLASSC(ntohl(ic_myaddr))) |
388 | ic_netmask = htonl(IN_CLASSC_NET); | 388 | ic_netmask = htonl(IN_CLASSC_NET); |
389 | else { | 389 | else { |
390 | printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n", | 390 | printk(KERN_ERR "IP-Config: Unable to guess netmask for address %pI4\n", |
391 | NIPQUAD(ic_myaddr)); | 391 | &ic_myaddr); |
392 | return -1; | 392 | return -1; |
393 | } | 393 | } |
394 | printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask)); | 394 | printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); |
395 | } | 395 | } |
396 | 396 | ||
397 | return 0; | 397 | return 0; |
@@ -979,10 +979,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
979 | ic_myaddr = b->your_ip; | 979 | ic_myaddr = b->your_ip; |
980 | ic_servaddr = server_id; | 980 | ic_servaddr = server_id; |
981 | #ifdef IPCONFIG_DEBUG | 981 | #ifdef IPCONFIG_DEBUG |
982 | printk("DHCP: Offered address " NIPQUAD_FMT, | 982 | printk("DHCP: Offered address %pI4 by server %pI4\n", |
983 | NIPQUAD(ic_myaddr)); | 983 | &ic_myaddr, &ic_servaddr); |
984 | printk(" by server " NIPQUAD_FMT "\n", | ||
985 | NIPQUAD(ic_servaddr)); | ||
986 | #endif | 984 | #endif |
987 | /* The DHCP indicated server address takes | 985 | /* The DHCP indicated server address takes |
988 | * precedence over the bootp header one if | 986 | * precedence over the bootp header one if |
@@ -1177,11 +1175,11 @@ static int __init ic_dynamic(void) | |||
1177 | return -1; | 1175 | return -1; |
1178 | } | 1176 | } |
1179 | 1177 | ||
1180 | printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ", | 1178 | printk("IP-Config: Got %s answer from %pI4, ", |
1181 | ((ic_got_reply & IC_RARP) ? "RARP" | 1179 | ((ic_got_reply & IC_RARP) ? "RARP" |
1182 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), | 1180 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), |
1183 | NIPQUAD(ic_servaddr)); | 1181 | &ic_servaddr); |
1184 | printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr)); | 1182 | printk("my address is %pI4\n", &ic_myaddr); |
1185 | 1183 | ||
1186 | return 0; | 1184 | return 0; |
1187 | } | 1185 | } |
@@ -1206,14 +1204,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v) | |||
1206 | "domain %s\n", ic_domain); | 1204 | "domain %s\n", ic_domain); |
1207 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { | 1205 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { |
1208 | if (ic_nameservers[i] != NONE) | 1206 | if (ic_nameservers[i] != NONE) |
1209 | seq_printf(seq, | 1207 | seq_printf(seq, "nameserver %pI4\n", |
1210 | "nameserver " NIPQUAD_FMT "\n", | 1208 | &ic_nameservers[i]); |
1211 | NIPQUAD(ic_nameservers[i])); | ||
1212 | } | 1209 | } |
1213 | if (ic_servaddr != NONE) | 1210 | if (ic_servaddr != NONE) |
1214 | seq_printf(seq, | 1211 | seq_printf(seq, "bootserver %pI4\n", |
1215 | "bootserver " NIPQUAD_FMT "\n", | 1212 | &ic_servaddr); |
1216 | NIPQUAD(ic_servaddr)); | ||
1217 | return 0; | 1213 | return 0; |
1218 | } | 1214 | } |
1219 | 1215 | ||
@@ -1387,13 +1383,13 @@ static int __init ip_auto_config(void) | |||
1387 | */ | 1383 | */ |
1388 | printk("IP-Config: Complete:"); | 1384 | printk("IP-Config: Complete:"); |
1389 | printk("\n device=%s", ic_dev->name); | 1385 | printk("\n device=%s", ic_dev->name); |
1390 | printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr)); | 1386 | printk(", addr=%pI4", &ic_myaddr); |
1391 | printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask)); | 1387 | printk(", mask=%pI4", &ic_netmask); |
1392 | printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway)); | 1388 | printk(", gw=%pI4", &ic_gateway); |
1393 | printk(",\n host=%s, domain=%s, nis-domain=%s", | 1389 | printk(",\n host=%s, domain=%s, nis-domain=%s", |
1394 | utsname()->nodename, ic_domain, utsname()->domainname); | 1390 | utsname()->nodename, ic_domain, utsname()->domainname); |
1395 | printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr)); | 1391 | printk(",\n bootserver=%pI4", &ic_servaddr); |
1396 | printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr)); | 1392 | printk(", rootserver=%pI4", &root_server_addr); |
1397 | printk(", rootpath=%s", root_server_path); | 1393 | printk(", rootpath=%s", root_server_path); |
1398 | printk("\n"); | 1394 | printk("\n"); |
1399 | #endif /* !SILENT */ | 1395 | #endif /* !SILENT */ |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 29609d29df76..5079dfbc6f38 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -130,8 +130,8 @@ struct ipip_net { | |||
130 | struct net_device *fb_tunnel_dev; | 130 | struct net_device *fb_tunnel_dev; |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static int ipip_fb_tunnel_init(struct net_device *dev); | 133 | static void ipip_fb_tunnel_init(struct net_device *dev); |
134 | static int ipip_tunnel_init(struct net_device *dev); | 134 | static void ipip_tunnel_init(struct net_device *dev); |
135 | static void ipip_tunnel_setup(struct net_device *dev); | 135 | static void ipip_tunnel_setup(struct net_device *dev); |
136 | 136 | ||
137 | static DEFINE_RWLOCK(ipip_lock); | 137 | static DEFINE_RWLOCK(ipip_lock); |
@@ -245,9 +245,10 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | |||
245 | } | 245 | } |
246 | 246 | ||
247 | nt = netdev_priv(dev); | 247 | nt = netdev_priv(dev); |
248 | dev->init = ipip_tunnel_init; | ||
249 | nt->parms = *parms; | 248 | nt->parms = *parms; |
250 | 249 | ||
250 | ipip_tunnel_init(dev); | ||
251 | |||
251 | if (register_netdevice(dev) < 0) | 252 | if (register_netdevice(dev) < 0) |
252 | goto failed_free; | 253 | goto failed_free; |
253 | 254 | ||
@@ -281,7 +282,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
281 | 8 bytes of packet payload. It means, that precise relaying of | 282 | 8 bytes of packet payload. It means, that precise relaying of |
282 | ICMP in the real Internet is absolutely infeasible. | 283 | ICMP in the real Internet is absolutely infeasible. |
283 | */ | 284 | */ |
284 | struct iphdr *iph = (struct iphdr*)skb->data; | 285 | struct iphdr *iph = (struct iphdr *)skb->data; |
285 | const int type = icmp_hdr(skb)->type; | 286 | const int type = icmp_hdr(skb)->type; |
286 | const int code = icmp_hdr(skb)->code; | 287 | const int code = icmp_hdr(skb)->code; |
287 | struct ip_tunnel *t; | 288 | struct ip_tunnel *t; |
@@ -691,12 +692,17 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | |||
691 | return 0; | 692 | return 0; |
692 | } | 693 | } |
693 | 694 | ||
695 | static const struct net_device_ops ipip_netdev_ops = { | ||
696 | .ndo_uninit = ipip_tunnel_uninit, | ||
697 | .ndo_start_xmit = ipip_tunnel_xmit, | ||
698 | .ndo_do_ioctl = ipip_tunnel_ioctl, | ||
699 | .ndo_change_mtu = ipip_tunnel_change_mtu, | ||
700 | |||
701 | }; | ||
702 | |||
694 | static void ipip_tunnel_setup(struct net_device *dev) | 703 | static void ipip_tunnel_setup(struct net_device *dev) |
695 | { | 704 | { |
696 | dev->uninit = ipip_tunnel_uninit; | 705 | dev->netdev_ops = &ipip_netdev_ops; |
697 | dev->hard_start_xmit = ipip_tunnel_xmit; | ||
698 | dev->do_ioctl = ipip_tunnel_ioctl; | ||
699 | dev->change_mtu = ipip_tunnel_change_mtu; | ||
700 | dev->destructor = free_netdev; | 706 | dev->destructor = free_netdev; |
701 | 707 | ||
702 | dev->type = ARPHRD_TUNNEL; | 708 | dev->type = ARPHRD_TUNNEL; |
@@ -708,11 +714,9 @@ static void ipip_tunnel_setup(struct net_device *dev) | |||
708 | dev->features |= NETIF_F_NETNS_LOCAL; | 714 | dev->features |= NETIF_F_NETNS_LOCAL; |
709 | } | 715 | } |
710 | 716 | ||
711 | static int ipip_tunnel_init(struct net_device *dev) | 717 | static void ipip_tunnel_init(struct net_device *dev) |
712 | { | 718 | { |
713 | struct ip_tunnel *tunnel; | 719 | struct ip_tunnel *tunnel = netdev_priv(dev); |
714 | |||
715 | tunnel = netdev_priv(dev); | ||
716 | 720 | ||
717 | tunnel->dev = dev; | 721 | tunnel->dev = dev; |
718 | strcpy(tunnel->parms.name, dev->name); | 722 | strcpy(tunnel->parms.name, dev->name); |
@@ -721,11 +725,9 @@ static int ipip_tunnel_init(struct net_device *dev) | |||
721 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 725 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
722 | 726 | ||
723 | ipip_tunnel_bind_dev(dev); | 727 | ipip_tunnel_bind_dev(dev); |
724 | |||
725 | return 0; | ||
726 | } | 728 | } |
727 | 729 | ||
728 | static int ipip_fb_tunnel_init(struct net_device *dev) | 730 | static void ipip_fb_tunnel_init(struct net_device *dev) |
729 | { | 731 | { |
730 | struct ip_tunnel *tunnel = netdev_priv(dev); | 732 | struct ip_tunnel *tunnel = netdev_priv(dev); |
731 | struct iphdr *iph = &tunnel->parms.iph; | 733 | struct iphdr *iph = &tunnel->parms.iph; |
@@ -740,7 +742,6 @@ static int ipip_fb_tunnel_init(struct net_device *dev) | |||
740 | 742 | ||
741 | dev_hold(dev); | 743 | dev_hold(dev); |
742 | ipn->tunnels_wc[0] = tunnel; | 744 | ipn->tunnels_wc[0] = tunnel; |
743 | return 0; | ||
744 | } | 745 | } |
745 | 746 | ||
746 | static struct xfrm_tunnel ipip_handler = { | 747 | static struct xfrm_tunnel ipip_handler = { |
@@ -792,10 +793,10 @@ static int ipip_init_net(struct net *net) | |||
792 | err = -ENOMEM; | 793 | err = -ENOMEM; |
793 | goto err_alloc_dev; | 794 | goto err_alloc_dev; |
794 | } | 795 | } |
795 | |||
796 | ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; | ||
797 | dev_net_set(ipn->fb_tunnel_dev, net); | 796 | dev_net_set(ipn->fb_tunnel_dev, net); |
798 | 797 | ||
798 | ipip_fb_tunnel_init(ipn->fb_tunnel_dev); | ||
799 | |||
799 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | 800 | if ((err = register_netdev(ipn->fb_tunnel_dev))) |
800 | goto err_reg_dev; | 801 | goto err_reg_dev; |
801 | 802 | ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 25924b1eb2ef..14666449dc1c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -124,8 +124,8 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | |||
124 | 124 | ||
125 | dev = __dev_get_by_name(&init_net, "tunl0"); | 125 | dev = __dev_get_by_name(&init_net, "tunl0"); |
126 | if (dev) { | 126 | if (dev) { |
127 | const struct net_device_ops *ops = dev->netdev_ops; | ||
127 | struct ifreq ifr; | 128 | struct ifreq ifr; |
128 | mm_segment_t oldfs; | ||
129 | struct ip_tunnel_parm p; | 129 | struct ip_tunnel_parm p; |
130 | 130 | ||
131 | memset(&p, 0, sizeof(p)); | 131 | memset(&p, 0, sizeof(p)); |
@@ -137,9 +137,13 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | |||
137 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); | 137 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); |
138 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; | 138 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; |
139 | 139 | ||
140 | oldfs = get_fs(); set_fs(KERNEL_DS); | 140 | if (ops->ndo_do_ioctl) { |
141 | dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); | 141 | mm_segment_t oldfs = get_fs(); |
142 | set_fs(oldfs); | 142 | |
143 | set_fs(KERNEL_DS); | ||
144 | ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); | ||
145 | set_fs(oldfs); | ||
146 | } | ||
143 | } | 147 | } |
144 | } | 148 | } |
145 | 149 | ||
@@ -151,9 +155,9 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) | |||
151 | dev = __dev_get_by_name(&init_net, "tunl0"); | 155 | dev = __dev_get_by_name(&init_net, "tunl0"); |
152 | 156 | ||
153 | if (dev) { | 157 | if (dev) { |
158 | const struct net_device_ops *ops = dev->netdev_ops; | ||
154 | int err; | 159 | int err; |
155 | struct ifreq ifr; | 160 | struct ifreq ifr; |
156 | mm_segment_t oldfs; | ||
157 | struct ip_tunnel_parm p; | 161 | struct ip_tunnel_parm p; |
158 | struct in_device *in_dev; | 162 | struct in_device *in_dev; |
159 | 163 | ||
@@ -166,9 +170,14 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) | |||
166 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); | 170 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); |
167 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; | 171 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; |
168 | 172 | ||
169 | oldfs = get_fs(); set_fs(KERNEL_DS); | 173 | if (ops->ndo_do_ioctl) { |
170 | err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); | 174 | mm_segment_t oldfs = get_fs(); |
171 | set_fs(oldfs); | 175 | |
176 | set_fs(KERNEL_DS); | ||
177 | err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); | ||
178 | set_fs(oldfs); | ||
179 | } else | ||
180 | err = -EOPNOTSUPP; | ||
172 | 181 | ||
173 | dev = NULL; | 182 | dev = NULL; |
174 | 183 | ||
@@ -213,12 +222,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | |||
213 | return 0; | 222 | return 0; |
214 | } | 223 | } |
215 | 224 | ||
225 | static const struct net_device_ops reg_vif_netdev_ops = { | ||
226 | .ndo_start_xmit = reg_vif_xmit, | ||
227 | }; | ||
228 | |||
216 | static void reg_vif_setup(struct net_device *dev) | 229 | static void reg_vif_setup(struct net_device *dev) |
217 | { | 230 | { |
218 | dev->type = ARPHRD_PIMREG; | 231 | dev->type = ARPHRD_PIMREG; |
219 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; | 232 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; |
220 | dev->flags = IFF_NOARP; | 233 | dev->flags = IFF_NOARP; |
221 | dev->hard_start_xmit = reg_vif_xmit; | 234 | dev->netdev_ops = ®_vif_netdev_ops, |
222 | dev->destructor = free_netdev; | 235 | dev->destructor = free_netdev; |
223 | } | 236 | } |
224 | 237 | ||
@@ -331,7 +344,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
331 | 344 | ||
332 | atomic_dec(&cache_resolve_queue_len); | 345 | atomic_dec(&cache_resolve_queue_len); |
333 | 346 | ||
334 | while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { | 347 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { |
335 | if (ip_hdr(skb)->version == 0) { | 348 | if (ip_hdr(skb)->version == 0) { |
336 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 349 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
337 | nlh->nlmsg_type = NLMSG_ERROR; | 350 | nlh->nlmsg_type = NLMSG_ERROR; |
@@ -477,13 +490,13 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
477 | /* | 490 | /* |
478 | * Fill in the VIF structures | 491 | * Fill in the VIF structures |
479 | */ | 492 | */ |
480 | v->rate_limit=vifc->vifc_rate_limit; | 493 | v->rate_limit = vifc->vifc_rate_limit; |
481 | v->local=vifc->vifc_lcl_addr.s_addr; | 494 | v->local = vifc->vifc_lcl_addr.s_addr; |
482 | v->remote=vifc->vifc_rmt_addr.s_addr; | 495 | v->remote = vifc->vifc_rmt_addr.s_addr; |
483 | v->flags=vifc->vifc_flags; | 496 | v->flags = vifc->vifc_flags; |
484 | if (!mrtsock) | 497 | if (!mrtsock) |
485 | v->flags |= VIFF_STATIC; | 498 | v->flags |= VIFF_STATIC; |
486 | v->threshold=vifc->vifc_threshold; | 499 | v->threshold = vifc->vifc_threshold; |
487 | v->bytes_in = 0; | 500 | v->bytes_in = 0; |
488 | v->bytes_out = 0; | 501 | v->bytes_out = 0; |
489 | v->pkt_in = 0; | 502 | v->pkt_in = 0; |
@@ -494,7 +507,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
494 | 507 | ||
495 | /* And finish update writing critical data */ | 508 | /* And finish update writing critical data */ |
496 | write_lock_bh(&mrt_lock); | 509 | write_lock_bh(&mrt_lock); |
497 | v->dev=dev; | 510 | v->dev = dev; |
498 | #ifdef CONFIG_IP_PIMSM | 511 | #ifdef CONFIG_IP_PIMSM |
499 | if (v->flags&VIFF_REGISTER) | 512 | if (v->flags&VIFF_REGISTER) |
500 | reg_vif_num = vifi; | 513 | reg_vif_num = vifi; |
@@ -507,7 +520,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
507 | 520 | ||
508 | static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) | 521 | static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) |
509 | { | 522 | { |
510 | int line=MFC_HASH(mcastgrp,origin); | 523 | int line = MFC_HASH(mcastgrp, origin); |
511 | struct mfc_cache *c; | 524 | struct mfc_cache *c; |
512 | 525 | ||
513 | for (c=mfc_cache_array[line]; c; c = c->next) { | 526 | for (c=mfc_cache_array[line]; c; c = c->next) { |
@@ -522,8 +535,8 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) | |||
522 | */ | 535 | */ |
523 | static struct mfc_cache *ipmr_cache_alloc(void) | 536 | static struct mfc_cache *ipmr_cache_alloc(void) |
524 | { | 537 | { |
525 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 538 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
526 | if (c==NULL) | 539 | if (c == NULL) |
527 | return NULL; | 540 | return NULL; |
528 | c->mfc_un.res.minvif = MAXVIFS; | 541 | c->mfc_un.res.minvif = MAXVIFS; |
529 | return c; | 542 | return c; |
@@ -531,8 +544,8 @@ static struct mfc_cache *ipmr_cache_alloc(void) | |||
531 | 544 | ||
532 | static struct mfc_cache *ipmr_cache_alloc_unres(void) | 545 | static struct mfc_cache *ipmr_cache_alloc_unres(void) |
533 | { | 546 | { |
534 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 547 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
535 | if (c==NULL) | 548 | if (c == NULL) |
536 | return NULL; | 549 | return NULL; |
537 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 550 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
538 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 551 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
@@ -552,7 +565,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
552 | * Play the pending entries through our router | 565 | * Play the pending entries through our router |
553 | */ | 566 | */ |
554 | 567 | ||
555 | while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 568 | while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
556 | if (ip_hdr(skb)->version == 0) { | 569 | if (ip_hdr(skb)->version == 0) { |
557 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 570 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
558 | 571 | ||
@@ -637,7 +650,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
637 | * Add our header | 650 | * Add our header |
638 | */ | 651 | */ |
639 | 652 | ||
640 | igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); | 653 | igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); |
641 | igmp->type = | 654 | igmp->type = |
642 | msg->im_msgtype = assert; | 655 | msg->im_msgtype = assert; |
643 | igmp->code = 0; | 656 | igmp->code = 0; |
@@ -653,7 +666,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
653 | /* | 666 | /* |
654 | * Deliver to mrouted | 667 | * Deliver to mrouted |
655 | */ | 668 | */ |
656 | if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { | 669 | if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { |
657 | if (net_ratelimit()) | 670 | if (net_ratelimit()) |
658 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); | 671 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); |
659 | kfree_skb(skb); | 672 | kfree_skb(skb); |
@@ -685,7 +698,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
685 | * Create a new entry if allowable | 698 | * Create a new entry if allowable |
686 | */ | 699 | */ |
687 | 700 | ||
688 | if (atomic_read(&cache_resolve_queue_len)>=10 || | 701 | if (atomic_read(&cache_resolve_queue_len) >= 10 || |
689 | (c=ipmr_cache_alloc_unres())==NULL) { | 702 | (c=ipmr_cache_alloc_unres())==NULL) { |
690 | spin_unlock_bh(&mfc_unres_lock); | 703 | spin_unlock_bh(&mfc_unres_lock); |
691 | 704 | ||
@@ -728,7 +741,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
728 | kfree_skb(skb); | 741 | kfree_skb(skb); |
729 | err = -ENOBUFS; | 742 | err = -ENOBUFS; |
730 | } else { | 743 | } else { |
731 | skb_queue_tail(&c->mfc_un.unres.unresolved,skb); | 744 | skb_queue_tail(&c->mfc_un.unres.unresolved, skb); |
732 | err = 0; | 745 | err = 0; |
733 | } | 746 | } |
734 | 747 | ||
@@ -745,7 +758,7 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) | |||
745 | int line; | 758 | int line; |
746 | struct mfc_cache *c, **cp; | 759 | struct mfc_cache *c, **cp; |
747 | 760 | ||
748 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 761 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
749 | 762 | ||
750 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 763 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { |
751 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 764 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
@@ -766,7 +779,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
766 | int line; | 779 | int line; |
767 | struct mfc_cache *uc, *c, **cp; | 780 | struct mfc_cache *uc, *c, **cp; |
768 | 781 | ||
769 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 782 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
770 | 783 | ||
771 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 784 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { |
772 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 785 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
@@ -787,13 +800,13 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
787 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) | 800 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) |
788 | return -EINVAL; | 801 | return -EINVAL; |
789 | 802 | ||
790 | c=ipmr_cache_alloc(); | 803 | c = ipmr_cache_alloc(); |
791 | if (c==NULL) | 804 | if (c == NULL) |
792 | return -ENOMEM; | 805 | return -ENOMEM; |
793 | 806 | ||
794 | c->mfc_origin=mfc->mfcc_origin.s_addr; | 807 | c->mfc_origin = mfc->mfcc_origin.s_addr; |
795 | c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; | 808 | c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; |
796 | c->mfc_parent=mfc->mfcc_parent; | 809 | c->mfc_parent = mfc->mfcc_parent; |
797 | ipmr_update_thresholds(c, mfc->mfcc_ttls); | 810 | ipmr_update_thresholds(c, mfc->mfcc_ttls); |
798 | if (!mrtsock) | 811 | if (!mrtsock) |
799 | c->mfc_flags |= MFC_STATIC; | 812 | c->mfc_flags |= MFC_STATIC; |
@@ -846,7 +859,7 @@ static void mroute_clean_tables(struct sock *sk) | |||
846 | /* | 859 | /* |
847 | * Wipe the cache | 860 | * Wipe the cache |
848 | */ | 861 | */ |
849 | for (i=0;i<MFC_LINES;i++) { | 862 | for (i=0; i<MFC_LINES; i++) { |
850 | struct mfc_cache *c, **cp; | 863 | struct mfc_cache *c, **cp; |
851 | 864 | ||
852 | cp = &mfc_cache_array[i]; | 865 | cp = &mfc_cache_array[i]; |
@@ -887,7 +900,7 @@ static void mrtsock_destruct(struct sock *sk) | |||
887 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; | 900 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; |
888 | 901 | ||
889 | write_lock_bh(&mrt_lock); | 902 | write_lock_bh(&mrt_lock); |
890 | mroute_socket=NULL; | 903 | mroute_socket = NULL; |
891 | write_unlock_bh(&mrt_lock); | 904 | write_unlock_bh(&mrt_lock); |
892 | 905 | ||
893 | mroute_clean_tables(sk); | 906 | mroute_clean_tables(sk); |
@@ -902,7 +915,7 @@ static void mrtsock_destruct(struct sock *sk) | |||
902 | * MOSPF/PIM router set up we can clean this up. | 915 | * MOSPF/PIM router set up we can clean this up. |
903 | */ | 916 | */ |
904 | 917 | ||
905 | int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) | 918 | int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) |
906 | { | 919 | { |
907 | int ret; | 920 | int ret; |
908 | struct vifctl vif; | 921 | struct vifctl vif; |
@@ -918,7 +931,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
918 | if (sk->sk_type != SOCK_RAW || | 931 | if (sk->sk_type != SOCK_RAW || |
919 | inet_sk(sk)->num != IPPROTO_IGMP) | 932 | inet_sk(sk)->num != IPPROTO_IGMP) |
920 | return -EOPNOTSUPP; | 933 | return -EOPNOTSUPP; |
921 | if (optlen!=sizeof(int)) | 934 | if (optlen != sizeof(int)) |
922 | return -ENOPROTOOPT; | 935 | return -ENOPROTOOPT; |
923 | 936 | ||
924 | rtnl_lock(); | 937 | rtnl_lock(); |
@@ -930,7 +943,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
930 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 943 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
931 | if (ret == 0) { | 944 | if (ret == 0) { |
932 | write_lock_bh(&mrt_lock); | 945 | write_lock_bh(&mrt_lock); |
933 | mroute_socket=sk; | 946 | mroute_socket = sk; |
934 | write_unlock_bh(&mrt_lock); | 947 | write_unlock_bh(&mrt_lock); |
935 | 948 | ||
936 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; | 949 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; |
@@ -938,19 +951,19 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
938 | rtnl_unlock(); | 951 | rtnl_unlock(); |
939 | return ret; | 952 | return ret; |
940 | case MRT_DONE: | 953 | case MRT_DONE: |
941 | if (sk!=mroute_socket) | 954 | if (sk != mroute_socket) |
942 | return -EACCES; | 955 | return -EACCES; |
943 | return ip_ra_control(sk, 0, NULL); | 956 | return ip_ra_control(sk, 0, NULL); |
944 | case MRT_ADD_VIF: | 957 | case MRT_ADD_VIF: |
945 | case MRT_DEL_VIF: | 958 | case MRT_DEL_VIF: |
946 | if (optlen!=sizeof(vif)) | 959 | if (optlen != sizeof(vif)) |
947 | return -EINVAL; | 960 | return -EINVAL; |
948 | if (copy_from_user(&vif,optval,sizeof(vif))) | 961 | if (copy_from_user(&vif, optval, sizeof(vif))) |
949 | return -EFAULT; | 962 | return -EFAULT; |
950 | if (vif.vifc_vifi >= MAXVIFS) | 963 | if (vif.vifc_vifi >= MAXVIFS) |
951 | return -ENFILE; | 964 | return -ENFILE; |
952 | rtnl_lock(); | 965 | rtnl_lock(); |
953 | if (optname==MRT_ADD_VIF) { | 966 | if (optname == MRT_ADD_VIF) { |
954 | ret = vif_add(&vif, sk==mroute_socket); | 967 | ret = vif_add(&vif, sk==mroute_socket); |
955 | } else { | 968 | } else { |
956 | ret = vif_delete(vif.vifc_vifi, 0); | 969 | ret = vif_delete(vif.vifc_vifi, 0); |
@@ -964,12 +977,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
964 | */ | 977 | */ |
965 | case MRT_ADD_MFC: | 978 | case MRT_ADD_MFC: |
966 | case MRT_DEL_MFC: | 979 | case MRT_DEL_MFC: |
967 | if (optlen!=sizeof(mfc)) | 980 | if (optlen != sizeof(mfc)) |
968 | return -EINVAL; | 981 | return -EINVAL; |
969 | if (copy_from_user(&mfc,optval, sizeof(mfc))) | 982 | if (copy_from_user(&mfc, optval, sizeof(mfc))) |
970 | return -EFAULT; | 983 | return -EFAULT; |
971 | rtnl_lock(); | 984 | rtnl_lock(); |
972 | if (optname==MRT_DEL_MFC) | 985 | if (optname == MRT_DEL_MFC) |
973 | ret = ipmr_mfc_delete(&mfc); | 986 | ret = ipmr_mfc_delete(&mfc); |
974 | else | 987 | else |
975 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); | 988 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); |
@@ -1028,12 +1041,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
1028 | * Getsock opt support for the multicast routing system. | 1041 | * Getsock opt support for the multicast routing system. |
1029 | */ | 1042 | */ |
1030 | 1043 | ||
1031 | int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) | 1044 | int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) |
1032 | { | 1045 | { |
1033 | int olr; | 1046 | int olr; |
1034 | int val; | 1047 | int val; |
1035 | 1048 | ||
1036 | if (optname!=MRT_VERSION && | 1049 | if (optname != MRT_VERSION && |
1037 | #ifdef CONFIG_IP_PIMSM | 1050 | #ifdef CONFIG_IP_PIMSM |
1038 | optname!=MRT_PIM && | 1051 | optname!=MRT_PIM && |
1039 | #endif | 1052 | #endif |
@@ -1047,17 +1060,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u | |||
1047 | if (olr < 0) | 1060 | if (olr < 0) |
1048 | return -EINVAL; | 1061 | return -EINVAL; |
1049 | 1062 | ||
1050 | if (put_user(olr,optlen)) | 1063 | if (put_user(olr, optlen)) |
1051 | return -EFAULT; | 1064 | return -EFAULT; |
1052 | if (optname==MRT_VERSION) | 1065 | if (optname == MRT_VERSION) |
1053 | val=0x0305; | 1066 | val = 0x0305; |
1054 | #ifdef CONFIG_IP_PIMSM | 1067 | #ifdef CONFIG_IP_PIMSM |
1055 | else if (optname==MRT_PIM) | 1068 | else if (optname == MRT_PIM) |
1056 | val=mroute_do_pim; | 1069 | val = mroute_do_pim; |
1057 | #endif | 1070 | #endif |
1058 | else | 1071 | else |
1059 | val=mroute_do_assert; | 1072 | val = mroute_do_assert; |
1060 | if (copy_to_user(optval,&val,olr)) | 1073 | if (copy_to_user(optval, &val, olr)) |
1061 | return -EFAULT; | 1074 | return -EFAULT; |
1062 | return 0; | 1075 | return 0; |
1063 | } | 1076 | } |
@@ -1075,27 +1088,27 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1075 | 1088 | ||
1076 | switch (cmd) { | 1089 | switch (cmd) { |
1077 | case SIOCGETVIFCNT: | 1090 | case SIOCGETVIFCNT: |
1078 | if (copy_from_user(&vr,arg,sizeof(vr))) | 1091 | if (copy_from_user(&vr, arg, sizeof(vr))) |
1079 | return -EFAULT; | 1092 | return -EFAULT; |
1080 | if (vr.vifi>=maxvif) | 1093 | if (vr.vifi >= maxvif) |
1081 | return -EINVAL; | 1094 | return -EINVAL; |
1082 | read_lock(&mrt_lock); | 1095 | read_lock(&mrt_lock); |
1083 | vif=&vif_table[vr.vifi]; | 1096 | vif=&vif_table[vr.vifi]; |
1084 | if (VIF_EXISTS(vr.vifi)) { | 1097 | if (VIF_EXISTS(vr.vifi)) { |
1085 | vr.icount=vif->pkt_in; | 1098 | vr.icount = vif->pkt_in; |
1086 | vr.ocount=vif->pkt_out; | 1099 | vr.ocount = vif->pkt_out; |
1087 | vr.ibytes=vif->bytes_in; | 1100 | vr.ibytes = vif->bytes_in; |
1088 | vr.obytes=vif->bytes_out; | 1101 | vr.obytes = vif->bytes_out; |
1089 | read_unlock(&mrt_lock); | 1102 | read_unlock(&mrt_lock); |
1090 | 1103 | ||
1091 | if (copy_to_user(arg,&vr,sizeof(vr))) | 1104 | if (copy_to_user(arg, &vr, sizeof(vr))) |
1092 | return -EFAULT; | 1105 | return -EFAULT; |
1093 | return 0; | 1106 | return 0; |
1094 | } | 1107 | } |
1095 | read_unlock(&mrt_lock); | 1108 | read_unlock(&mrt_lock); |
1096 | return -EADDRNOTAVAIL; | 1109 | return -EADDRNOTAVAIL; |
1097 | case SIOCGETSGCNT: | 1110 | case SIOCGETSGCNT: |
1098 | if (copy_from_user(&sr,arg,sizeof(sr))) | 1111 | if (copy_from_user(&sr, arg, sizeof(sr))) |
1099 | return -EFAULT; | 1112 | return -EFAULT; |
1100 | 1113 | ||
1101 | read_lock(&mrt_lock); | 1114 | read_lock(&mrt_lock); |
@@ -1106,7 +1119,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1106 | sr.wrong_if = c->mfc_un.res.wrong_if; | 1119 | sr.wrong_if = c->mfc_un.res.wrong_if; |
1107 | read_unlock(&mrt_lock); | 1120 | read_unlock(&mrt_lock); |
1108 | 1121 | ||
1109 | if (copy_to_user(arg,&sr,sizeof(sr))) | 1122 | if (copy_to_user(arg, &sr, sizeof(sr))) |
1110 | return -EFAULT; | 1123 | return -EFAULT; |
1111 | return 0; | 1124 | return 0; |
1112 | } | 1125 | } |
@@ -1130,15 +1143,15 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
1130 | if (event != NETDEV_UNREGISTER) | 1143 | if (event != NETDEV_UNREGISTER) |
1131 | return NOTIFY_DONE; | 1144 | return NOTIFY_DONE; |
1132 | v=&vif_table[0]; | 1145 | v=&vif_table[0]; |
1133 | for (ct=0;ct<maxvif;ct++,v++) { | 1146 | for (ct=0; ct<maxvif; ct++,v++) { |
1134 | if (v->dev==dev) | 1147 | if (v->dev == dev) |
1135 | vif_delete(ct, 1); | 1148 | vif_delete(ct, 1); |
1136 | } | 1149 | } |
1137 | return NOTIFY_DONE; | 1150 | return NOTIFY_DONE; |
1138 | } | 1151 | } |
1139 | 1152 | ||
1140 | 1153 | ||
1141 | static struct notifier_block ip_mr_notifier={ | 1154 | static struct notifier_block ip_mr_notifier = { |
1142 | .notifier_call = ipmr_device_event, | 1155 | .notifier_call = ipmr_device_event, |
1143 | }; | 1156 | }; |
1144 | 1157 | ||
@@ -1204,7 +1217,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1204 | #ifdef CONFIG_IP_PIMSM | 1217 | #ifdef CONFIG_IP_PIMSM |
1205 | if (vif->flags & VIFF_REGISTER) { | 1218 | if (vif->flags & VIFF_REGISTER) { |
1206 | vif->pkt_out++; | 1219 | vif->pkt_out++; |
1207 | vif->bytes_out+=skb->len; | 1220 | vif->bytes_out += skb->len; |
1208 | vif->dev->stats.tx_bytes += skb->len; | 1221 | vif->dev->stats.tx_bytes += skb->len; |
1209 | vif->dev->stats.tx_packets++; | 1222 | vif->dev->stats.tx_packets++; |
1210 | ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); | 1223 | ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); |
@@ -1254,7 +1267,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1254 | } | 1267 | } |
1255 | 1268 | ||
1256 | vif->pkt_out++; | 1269 | vif->pkt_out++; |
1257 | vif->bytes_out+=skb->len; | 1270 | vif->bytes_out += skb->len; |
1258 | 1271 | ||
1259 | dst_release(skb->dst); | 1272 | dst_release(skb->dst); |
1260 | skb->dst = &rt->u.dst; | 1273 | skb->dst = &rt->u.dst; |
@@ -1352,7 +1365,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1352 | } | 1365 | } |
1353 | 1366 | ||
1354 | vif_table[vif].pkt_in++; | 1367 | vif_table[vif].pkt_in++; |
1355 | vif_table[vif].bytes_in+=skb->len; | 1368 | vif_table[vif].bytes_in += skb->len; |
1356 | 1369 | ||
1357 | /* | 1370 | /* |
1358 | * Forward the frame | 1371 | * Forward the frame |
@@ -1364,7 +1377,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1364 | if (skb2) | 1377 | if (skb2) |
1365 | ipmr_queue_xmit(skb2, cache, psend); | 1378 | ipmr_queue_xmit(skb2, cache, psend); |
1366 | } | 1379 | } |
1367 | psend=ct; | 1380 | psend = ct; |
1368 | } | 1381 | } |
1369 | } | 1382 | } |
1370 | if (psend != -1) { | 1383 | if (psend != -1) { |
@@ -1428,7 +1441,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1428 | /* | 1441 | /* |
1429 | * No usable cache entry | 1442 | * No usable cache entry |
1430 | */ | 1443 | */ |
1431 | if (cache==NULL) { | 1444 | if (cache == NULL) { |
1432 | int vif; | 1445 | int vif; |
1433 | 1446 | ||
1434 | if (local) { | 1447 | if (local) { |
@@ -1469,29 +1482,13 @@ dont_forward: | |||
1469 | return 0; | 1482 | return 0; |
1470 | } | 1483 | } |
1471 | 1484 | ||
1472 | #ifdef CONFIG_IP_PIMSM_V1 | 1485 | #ifdef CONFIG_IP_PIMSM |
1473 | /* | 1486 | static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) |
1474 | * Handle IGMP messages of PIMv1 | ||
1475 | */ | ||
1476 | |||
1477 | int pim_rcv_v1(struct sk_buff * skb) | ||
1478 | { | 1487 | { |
1479 | struct igmphdr *pim; | 1488 | struct net_device *reg_dev = NULL; |
1480 | struct iphdr *encap; | 1489 | struct iphdr *encap; |
1481 | struct net_device *reg_dev = NULL; | ||
1482 | |||
1483 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | ||
1484 | goto drop; | ||
1485 | 1490 | ||
1486 | pim = igmp_hdr(skb); | 1491 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); |
1487 | |||
1488 | if (!mroute_do_pim || | ||
1489 | skb->len < sizeof(*pim) + sizeof(*encap) || | ||
1490 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | ||
1491 | goto drop; | ||
1492 | |||
1493 | encap = (struct iphdr *)(skb_transport_header(skb) + | ||
1494 | sizeof(struct igmphdr)); | ||
1495 | /* | 1492 | /* |
1496 | Check that: | 1493 | Check that: |
1497 | a. packet is really destinted to a multicast group | 1494 | a. packet is really destinted to a multicast group |
@@ -1500,8 +1497,8 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1500 | */ | 1497 | */ |
1501 | if (!ipv4_is_multicast(encap->daddr) || | 1498 | if (!ipv4_is_multicast(encap->daddr) || |
1502 | encap->tot_len == 0 || | 1499 | encap->tot_len == 0 || |
1503 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) | 1500 | ntohs(encap->tot_len) + pimlen > skb->len) |
1504 | goto drop; | 1501 | return 1; |
1505 | 1502 | ||
1506 | read_lock(&mrt_lock); | 1503 | read_lock(&mrt_lock); |
1507 | if (reg_vif_num >= 0) | 1504 | if (reg_vif_num >= 0) |
@@ -1511,7 +1508,7 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1511 | read_unlock(&mrt_lock); | 1508 | read_unlock(&mrt_lock); |
1512 | 1509 | ||
1513 | if (reg_dev == NULL) | 1510 | if (reg_dev == NULL) |
1514 | goto drop; | 1511 | return 1; |
1515 | 1512 | ||
1516 | skb->mac_header = skb->network_header; | 1513 | skb->mac_header = skb->network_header; |
1517 | skb_pull(skb, (u8*)encap - skb->data); | 1514 | skb_pull(skb, (u8*)encap - skb->data); |
@@ -1527,9 +1524,33 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1527 | nf_reset(skb); | 1524 | nf_reset(skb); |
1528 | netif_rx(skb); | 1525 | netif_rx(skb); |
1529 | dev_put(reg_dev); | 1526 | dev_put(reg_dev); |
1527 | |||
1530 | return 0; | 1528 | return 0; |
1531 | drop: | 1529 | } |
1532 | kfree_skb(skb); | 1530 | #endif |
1531 | |||
1532 | #ifdef CONFIG_IP_PIMSM_V1 | ||
1533 | /* | ||
1534 | * Handle IGMP messages of PIMv1 | ||
1535 | */ | ||
1536 | |||
1537 | int pim_rcv_v1(struct sk_buff * skb) | ||
1538 | { | ||
1539 | struct igmphdr *pim; | ||
1540 | |||
1541 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | ||
1542 | goto drop; | ||
1543 | |||
1544 | pim = igmp_hdr(skb); | ||
1545 | |||
1546 | if (!mroute_do_pim || | ||
1547 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | ||
1548 | goto drop; | ||
1549 | |||
1550 | if (__pim_rcv(skb, sizeof(*pim))) { | ||
1551 | drop: | ||
1552 | kfree_skb(skb); | ||
1553 | } | ||
1533 | return 0; | 1554 | return 0; |
1534 | } | 1555 | } |
1535 | #endif | 1556 | #endif |
@@ -1538,10 +1559,8 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1538 | static int pim_rcv(struct sk_buff * skb) | 1559 | static int pim_rcv(struct sk_buff * skb) |
1539 | { | 1560 | { |
1540 | struct pimreghdr *pim; | 1561 | struct pimreghdr *pim; |
1541 | struct iphdr *encap; | ||
1542 | struct net_device *reg_dev = NULL; | ||
1543 | 1562 | ||
1544 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 1563 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
1545 | goto drop; | 1564 | goto drop; |
1546 | 1565 | ||
1547 | pim = (struct pimreghdr *)skb_transport_header(skb); | 1566 | pim = (struct pimreghdr *)skb_transport_header(skb); |
@@ -1551,41 +1570,10 @@ static int pim_rcv(struct sk_buff * skb) | |||
1551 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1570 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
1552 | goto drop; | 1571 | goto drop; |
1553 | 1572 | ||
1554 | /* check if the inner packet is destined to mcast group */ | 1573 | if (__pim_rcv(skb, sizeof(*pim))) { |
1555 | encap = (struct iphdr *)(skb_transport_header(skb) + | 1574 | drop: |
1556 | sizeof(struct pimreghdr)); | 1575 | kfree_skb(skb); |
1557 | if (!ipv4_is_multicast(encap->daddr) || | 1576 | } |
1558 | encap->tot_len == 0 || | ||
1559 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) | ||
1560 | goto drop; | ||
1561 | |||
1562 | read_lock(&mrt_lock); | ||
1563 | if (reg_vif_num >= 0) | ||
1564 | reg_dev = vif_table[reg_vif_num].dev; | ||
1565 | if (reg_dev) | ||
1566 | dev_hold(reg_dev); | ||
1567 | read_unlock(&mrt_lock); | ||
1568 | |||
1569 | if (reg_dev == NULL) | ||
1570 | goto drop; | ||
1571 | |||
1572 | skb->mac_header = skb->network_header; | ||
1573 | skb_pull(skb, (u8*)encap - skb->data); | ||
1574 | skb_reset_network_header(skb); | ||
1575 | skb->dev = reg_dev; | ||
1576 | skb->protocol = htons(ETH_P_IP); | ||
1577 | skb->ip_summed = 0; | ||
1578 | skb->pkt_type = PACKET_HOST; | ||
1579 | dst_release(skb->dst); | ||
1580 | reg_dev->stats.rx_bytes += skb->len; | ||
1581 | reg_dev->stats.rx_packets++; | ||
1582 | skb->dst = NULL; | ||
1583 | nf_reset(skb); | ||
1584 | netif_rx(skb); | ||
1585 | dev_put(reg_dev); | ||
1586 | return 0; | ||
1587 | drop: | ||
1588 | kfree_skb(skb); | ||
1589 | return 0; | 1577 | return 0; |
1590 | } | 1578 | } |
1591 | #endif | 1579 | #endif |
@@ -1602,13 +1590,13 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1602 | if (dev) | 1590 | if (dev) |
1603 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); | 1591 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); |
1604 | 1592 | ||
1605 | mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); | 1593 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); |
1606 | 1594 | ||
1607 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 1595 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
1608 | if (c->mfc_un.res.ttls[ct] < 255) { | 1596 | if (c->mfc_un.res.ttls[ct] < 255) { |
1609 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 1597 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
1610 | goto rtattr_failure; | 1598 | goto rtattr_failure; |
1611 | nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1599 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
1612 | nhp->rtnh_flags = 0; | 1600 | nhp->rtnh_flags = 0; |
1613 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 1601 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
1614 | nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; | 1602 | nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; |
@@ -1634,7 +1622,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1634 | read_lock(&mrt_lock); | 1622 | read_lock(&mrt_lock); |
1635 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); | 1623 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); |
1636 | 1624 | ||
1637 | if (cache==NULL) { | 1625 | if (cache == NULL) { |
1638 | struct sk_buff *skb2; | 1626 | struct sk_buff *skb2; |
1639 | struct iphdr *iph; | 1627 | struct iphdr *iph; |
1640 | struct net_device *dev; | 1628 | struct net_device *dev; |
@@ -1866,15 +1854,16 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
1866 | const struct mfc_cache *mfc = v; | 1854 | const struct mfc_cache *mfc = v; |
1867 | const struct ipmr_mfc_iter *it = seq->private; | 1855 | const struct ipmr_mfc_iter *it = seq->private; |
1868 | 1856 | ||
1869 | seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", | 1857 | seq_printf(seq, "%08lX %08lX %-3hd", |
1870 | (unsigned long) mfc->mfc_mcastgrp, | 1858 | (unsigned long) mfc->mfc_mcastgrp, |
1871 | (unsigned long) mfc->mfc_origin, | 1859 | (unsigned long) mfc->mfc_origin, |
1872 | mfc->mfc_parent, | 1860 | mfc->mfc_parent); |
1873 | mfc->mfc_un.res.pkt, | ||
1874 | mfc->mfc_un.res.bytes, | ||
1875 | mfc->mfc_un.res.wrong_if); | ||
1876 | 1861 | ||
1877 | if (it->cache != &mfc_unres_queue) { | 1862 | if (it->cache != &mfc_unres_queue) { |
1863 | seq_printf(seq, " %8lu %8lu %8lu", | ||
1864 | mfc->mfc_un.res.pkt, | ||
1865 | mfc->mfc_un.res.bytes, | ||
1866 | mfc->mfc_un.res.wrong_if); | ||
1878 | for (n = mfc->mfc_un.res.minvif; | 1867 | for (n = mfc->mfc_un.res.minvif; |
1879 | n < mfc->mfc_un.res.maxvif; n++ ) { | 1868 | n < mfc->mfc_un.res.maxvif; n++ ) { |
1880 | if (VIF_EXISTS(n) | 1869 | if (VIF_EXISTS(n) |
@@ -1883,6 +1872,11 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
1883 | " %2d:%-3d", | 1872 | " %2d:%-3d", |
1884 | n, mfc->mfc_un.res.ttls[n]); | 1873 | n, mfc->mfc_un.res.ttls[n]); |
1885 | } | 1874 | } |
1875 | } else { | ||
1876 | /* unresolved mfc_caches don't contain | ||
1877 | * pkt, bytes and wrong_if values | ||
1878 | */ | ||
1879 | seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); | ||
1886 | } | 1880 | } |
1887 | seq_putc(seq, '\n'); | 1881 | seq_putc(seq, '\n'); |
1888 | } | 1882 | } |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb2..fdf6811c31a2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
66 | #ifdef CONFIG_XFRM | 66 | #ifdef CONFIG_XFRM |
67 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | 67 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
68 | xfrm_decode_session(skb, &fl, AF_INET) == 0) | 68 | xfrm_decode_session(skb, &fl, AF_INET) == 0) |
69 | if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) | 69 | if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) |
70 | return -1; | 70 | return -1; |
71 | #endif | 71 | #endif |
72 | 72 | ||
@@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) | |||
97 | dst = ((struct xfrm_dst *)dst)->route; | 97 | dst = ((struct xfrm_dst *)dst)->route; |
98 | dst_hold(dst); | 98 | dst_hold(dst); |
99 | 99 | ||
100 | if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) | 100 | if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) |
101 | return -1; | 101 | return -1; |
102 | 102 | ||
103 | dst_release(skb->dst); | 103 | dst_release(skb->dst); |
@@ -125,6 +125,7 @@ struct ip_rt_info { | |||
125 | __be32 daddr; | 125 | __be32 daddr; |
126 | __be32 saddr; | 126 | __be32 saddr; |
127 | u_int8_t tos; | 127 | u_int8_t tos; |
128 | u_int32_t mark; | ||
128 | }; | 129 | }; |
129 | 130 | ||
130 | static void nf_ip_saveroute(const struct sk_buff *skb, | 131 | static void nf_ip_saveroute(const struct sk_buff *skb, |
@@ -138,6 +139,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, | |||
138 | rt_info->tos = iph->tos; | 139 | rt_info->tos = iph->tos; |
139 | rt_info->daddr = iph->daddr; | 140 | rt_info->daddr = iph->daddr; |
140 | rt_info->saddr = iph->saddr; | 141 | rt_info->saddr = iph->saddr; |
142 | rt_info->mark = skb->mark; | ||
141 | } | 143 | } |
142 | } | 144 | } |
143 | 145 | ||
@@ -150,6 +152,7 @@ static int nf_ip_reroute(struct sk_buff *skb, | |||
150 | const struct iphdr *iph = ip_hdr(skb); | 152 | const struct iphdr *iph = ip_hdr(skb); |
151 | 153 | ||
152 | if (!(iph->tos == rt_info->tos | 154 | if (!(iph->tos == rt_info->tos |
155 | && skb->mark == rt_info->mark | ||
153 | && iph->daddr == rt_info->daddr | 156 | && iph->daddr == rt_info->daddr |
154 | && iph->saddr == rt_info->saddr)) | 157 | && iph->saddr == rt_info->saddr)) |
155 | return ip_route_me_harder(skb, RTN_UNSPEC); | 158 | return ip_route_me_harder(skb, RTN_UNSPEC); |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d70d29f1ccf..7ea88b61cb0d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -142,15 +142,15 @@ static inline int arp_packet_match(const struct arphdr *arphdr, | |||
142 | ARPT_INV_TGTIP)) { | 142 | ARPT_INV_TGTIP)) { |
143 | dprintf("Source or target IP address mismatch.\n"); | 143 | dprintf("Source or target IP address mismatch.\n"); |
144 | 144 | ||
145 | dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", | 145 | dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", |
146 | NIPQUAD(src_ipaddr), | 146 | &src_ipaddr, |
147 | NIPQUAD(arpinfo->smsk.s_addr), | 147 | &arpinfo->smsk.s_addr, |
148 | NIPQUAD(arpinfo->src.s_addr), | 148 | &arpinfo->src.s_addr, |
149 | arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); | 149 | arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); |
150 | dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", | 150 | dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", |
151 | NIPQUAD(tgt_ipaddr), | 151 | &tgt_ipaddr, |
152 | NIPQUAD(arpinfo->tmsk.s_addr), | 152 | &arpinfo->tmsk.s_addr, |
153 | NIPQUAD(arpinfo->tgt.s_addr), | 153 | &arpinfo->tgt.s_addr, |
154 | arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); | 154 | arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); |
155 | return 0; | 155 | return 0; |
156 | } | 156 | } |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index bee3d117661a..e091187e864f 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -75,16 +75,6 @@ static unsigned int arpt_out_hook(unsigned int hook, | |||
75 | dev_net(out)->ipv4.arptable_filter); | 75 | dev_net(out)->ipv4.arptable_filter); |
76 | } | 76 | } |
77 | 77 | ||
78 | static unsigned int arpt_forward_hook(unsigned int hook, | ||
79 | struct sk_buff *skb, | ||
80 | const struct net_device *in, | ||
81 | const struct net_device *out, | ||
82 | int (*okfn)(struct sk_buff *)) | ||
83 | { | ||
84 | return arpt_do_table(skb, hook, in, out, | ||
85 | dev_net(in)->ipv4.arptable_filter); | ||
86 | } | ||
87 | |||
88 | static struct nf_hook_ops arpt_ops[] __read_mostly = { | 78 | static struct nf_hook_ops arpt_ops[] __read_mostly = { |
89 | { | 79 | { |
90 | .hook = arpt_in_hook, | 80 | .hook = arpt_in_hook, |
@@ -101,7 +91,7 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { | |||
101 | .priority = NF_IP_PRI_FILTER, | 91 | .priority = NF_IP_PRI_FILTER, |
102 | }, | 92 | }, |
103 | { | 93 | { |
104 | .hook = arpt_forward_hook, | 94 | .hook = arpt_in_hook, |
105 | .owner = THIS_MODULE, | 95 | .owner = THIS_MODULE, |
106 | .pf = NFPROTO_ARP, | 96 | .pf = NFPROTO_ARP, |
107 | .hooknum = NF_ARP_FORWARD, | 97 | .hooknum = NF_ARP_FORWARD, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 213fb27debc1..ef8b6ca068b2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -94,15 +94,11 @@ ip_packet_match(const struct iphdr *ip, | |||
94 | IPT_INV_DSTIP)) { | 94 | IPT_INV_DSTIP)) { |
95 | dprintf("Source or dest mismatch.\n"); | 95 | dprintf("Source or dest mismatch.\n"); |
96 | 96 | ||
97 | dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", | 97 | dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", |
98 | NIPQUAD(ip->saddr), | 98 | &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, |
99 | NIPQUAD(ipinfo->smsk.s_addr), | ||
100 | NIPQUAD(ipinfo->src.s_addr), | ||
101 | ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); | 99 | ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); |
102 | dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", | 100 | dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", |
103 | NIPQUAD(ip->daddr), | 101 | &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, |
104 | NIPQUAD(ipinfo->dmsk.s_addr), | ||
105 | NIPQUAD(ipinfo->dst.s_addr), | ||
106 | ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); | 102 | ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); |
107 | return false; | 103 | return false; |
108 | } | 104 | } |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7ac1677419a9..2e4f98b85524 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -168,7 +168,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, | |||
168 | char buffer[16]; | 168 | char buffer[16]; |
169 | 169 | ||
170 | /* create proc dir entry */ | 170 | /* create proc dir entry */ |
171 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | 171 | sprintf(buffer, "%pI4", &ip); |
172 | c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, | 172 | c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, |
173 | clusterip_procdir, | 173 | clusterip_procdir, |
174 | &clusterip_proc_fops, c); | 174 | &clusterip_proc_fops, c); |
@@ -373,7 +373,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) | |||
373 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); | 373 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
374 | if (!config) { | 374 | if (!config) { |
375 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { | 375 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
376 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | 376 | printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); |
377 | return false; | 377 | return false; |
378 | } else { | 378 | } else { |
379 | struct net_device *dev; | 379 | struct net_device *dev; |
@@ -478,9 +478,8 @@ static void arp_print(struct arp_payload *payload) | |||
478 | } | 478 | } |
479 | hbuffer[--k]='\0'; | 479 | hbuffer[--k]='\0'; |
480 | 480 | ||
481 | printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", | 481 | printk("src %pI4@%s, dst %pI4\n", |
482 | NIPQUAD(payload->src_ip), hbuffer, | 482 | &payload->src_ip, hbuffer, &payload->dst_ip); |
483 | NIPQUAD(payload->dst_ip)); | ||
484 | } | 483 | } |
485 | #endif | 484 | #endif |
486 | 485 | ||
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 7b5dbe118c09..27a78fbbd92b 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -54,8 +54,8 @@ static void dump_packet(const struct nf_loginfo *info, | |||
54 | /* Important fields: | 54 | /* Important fields: |
55 | * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ | 55 | * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ |
56 | /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ | 56 | /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ |
57 | printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", | 57 | printk("SRC=%pI4 DST=%pI4 ", |
58 | NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); | 58 | &ih->saddr, &ih->daddr); |
59 | 59 | ||
60 | /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ | 60 | /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ |
61 | printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", | 61 | printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", |
@@ -262,8 +262,7 @@ static void dump_packet(const struct nf_loginfo *info, | |||
262 | break; | 262 | break; |
263 | case ICMP_REDIRECT: | 263 | case ICMP_REDIRECT: |
264 | /* Max length: 24 "GATEWAY=255.255.255.255 " */ | 264 | /* Max length: 24 "GATEWAY=255.255.255.255 " */ |
265 | printk("GATEWAY=%u.%u.%u.%u ", | 265 | printk("GATEWAY=%pI4 ", &ich->un.gateway); |
266 | NIPQUAD(ich->un.gateway)); | ||
267 | /* Fall through */ | 266 | /* Fall through */ |
268 | case ICMP_DEST_UNREACH: | 267 | case ICMP_DEST_UNREACH: |
269 | case ICMP_SOURCE_QUENCH: | 268 | case ICMP_SOURCE_QUENCH: |
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 88762f02779d..3b216be3bc9f 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c | |||
@@ -23,24 +23,25 @@ MODULE_LICENSE("GPL"); | |||
23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | 23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); |
24 | MODULE_DESCRIPTION("Xtables: address type match for IPv4"); | 24 | MODULE_DESCRIPTION("Xtables: address type match for IPv4"); |
25 | 25 | ||
26 | static inline bool match_type(const struct net_device *dev, __be32 addr, | 26 | static inline bool match_type(struct net *net, const struct net_device *dev, |
27 | u_int16_t mask) | 27 | __be32 addr, u_int16_t mask) |
28 | { | 28 | { |
29 | return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr))); | 29 | return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); |
30 | } | 30 | } |
31 | 31 | ||
32 | static bool | 32 | static bool |
33 | addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) | 33 | addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) |
34 | { | 34 | { |
35 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
35 | const struct ipt_addrtype_info *info = par->matchinfo; | 36 | const struct ipt_addrtype_info *info = par->matchinfo; |
36 | const struct iphdr *iph = ip_hdr(skb); | 37 | const struct iphdr *iph = ip_hdr(skb); |
37 | bool ret = true; | 38 | bool ret = true; |
38 | 39 | ||
39 | if (info->source) | 40 | if (info->source) |
40 | ret &= match_type(NULL, iph->saddr, info->source) ^ | 41 | ret &= match_type(net, NULL, iph->saddr, info->source) ^ |
41 | info->invert_source; | 42 | info->invert_source; |
42 | if (info->dest) | 43 | if (info->dest) |
43 | ret &= match_type(NULL, iph->daddr, info->dest) ^ | 44 | ret &= match_type(net, NULL, iph->daddr, info->dest) ^ |
44 | info->invert_dest; | 45 | info->invert_dest; |
45 | 46 | ||
46 | return ret; | 47 | return ret; |
@@ -49,6 +50,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) | |||
49 | static bool | 50 | static bool |
50 | addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) | 51 | addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) |
51 | { | 52 | { |
53 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
52 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; | 54 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; |
53 | const struct iphdr *iph = ip_hdr(skb); | 55 | const struct iphdr *iph = ip_hdr(skb); |
54 | const struct net_device *dev = NULL; | 56 | const struct net_device *dev = NULL; |
@@ -60,10 +62,10 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) | |||
60 | dev = par->out; | 62 | dev = par->out; |
61 | 63 | ||
62 | if (info->source) | 64 | if (info->source) |
63 | ret &= match_type(dev, iph->saddr, info->source) ^ | 65 | ret &= match_type(net, dev, iph->saddr, info->source) ^ |
64 | (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); | 66 | (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); |
65 | if (ret && info->dest) | 67 | if (ret && info->dest) |
66 | ret &= match_type(dev, iph->daddr, info->dest) ^ | 68 | ret &= match_type(net, dev, iph->daddr, info->dest) ^ |
67 | !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); | 69 | !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); |
68 | return ret; | 70 | return ret; |
69 | } | 71 | } |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4a7c35275396..b2141e11575e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -60,9 +60,8 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, | |||
60 | static int ipv4_print_tuple(struct seq_file *s, | 60 | static int ipv4_print_tuple(struct seq_file *s, |
61 | const struct nf_conntrack_tuple *tuple) | 61 | const struct nf_conntrack_tuple *tuple) |
62 | { | 62 | { |
63 | return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", | 63 | return seq_printf(s, "src=%pI4 dst=%pI4 ", |
64 | NIPQUAD(tuple->src.u3.ip), | 64 | &tuple->src.u3.ip, &tuple->dst.u3.ip); |
65 | NIPQUAD(tuple->dst.u3.ip)); | ||
66 | } | 65 | } |
67 | 66 | ||
68 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | 67 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, |
@@ -198,7 +197,7 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
198 | .data = &nf_conntrack_max, | 197 | .data = &nf_conntrack_max, |
199 | .maxlen = sizeof(int), | 198 | .maxlen = sizeof(int), |
200 | .mode = 0644, | 199 | .mode = 0644, |
201 | .proc_handler = &proc_dointvec, | 200 | .proc_handler = proc_dointvec, |
202 | }, | 201 | }, |
203 | { | 202 | { |
204 | .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, | 203 | .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, |
@@ -206,7 +205,7 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
206 | .data = &init_net.ct.count, | 205 | .data = &init_net.ct.count, |
207 | .maxlen = sizeof(int), | 206 | .maxlen = sizeof(int), |
208 | .mode = 0444, | 207 | .mode = 0444, |
209 | .proc_handler = &proc_dointvec, | 208 | .proc_handler = proc_dointvec, |
210 | }, | 209 | }, |
211 | { | 210 | { |
212 | .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, | 211 | .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, |
@@ -214,7 +213,7 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
214 | .data = &nf_conntrack_htable_size, | 213 | .data = &nf_conntrack_htable_size, |
215 | .maxlen = sizeof(unsigned int), | 214 | .maxlen = sizeof(unsigned int), |
216 | .mode = 0444, | 215 | .mode = 0444, |
217 | .proc_handler = &proc_dointvec, | 216 | .proc_handler = proc_dointvec, |
218 | }, | 217 | }, |
219 | { | 218 | { |
220 | .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, | 219 | .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, |
@@ -222,7 +221,7 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
222 | .data = &init_net.ct.sysctl_checksum, | 221 | .data = &init_net.ct.sysctl_checksum, |
223 | .maxlen = sizeof(int), | 222 | .maxlen = sizeof(int), |
224 | .mode = 0644, | 223 | .mode = 0644, |
225 | .proc_handler = &proc_dointvec, | 224 | .proc_handler = proc_dointvec, |
226 | }, | 225 | }, |
227 | { | 226 | { |
228 | .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, | 227 | .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, |
@@ -230,8 +229,8 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
230 | .data = &init_net.ct.sysctl_log_invalid, | 229 | .data = &init_net.ct.sysctl_log_invalid, |
231 | .maxlen = sizeof(unsigned int), | 230 | .maxlen = sizeof(unsigned int), |
232 | .mode = 0644, | 231 | .mode = 0644, |
233 | .proc_handler = &proc_dointvec_minmax, | 232 | .proc_handler = proc_dointvec_minmax, |
234 | .strategy = &sysctl_intvec, | 233 | .strategy = sysctl_intvec, |
235 | .extra1 = &log_invalid_proto_min, | 234 | .extra1 = &log_invalid_proto_min, |
236 | .extra2 = &log_invalid_proto_max, | 235 | .extra2 = &log_invalid_proto_max, |
237 | }, | 236 | }, |
@@ -284,17 +283,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
284 | .tuple.dst.u3.ip; | 283 | .tuple.dst.u3.ip; |
285 | memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); | 284 | memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); |
286 | 285 | ||
287 | pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", | 286 | pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", |
288 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | 287 | &sin.sin_addr.s_addr, ntohs(sin.sin_port)); |
289 | nf_ct_put(ct); | 288 | nf_ct_put(ct); |
290 | if (copy_to_user(user, &sin, sizeof(sin)) != 0) | 289 | if (copy_to_user(user, &sin, sizeof(sin)) != 0) |
291 | return -EFAULT; | 290 | return -EFAULT; |
292 | else | 291 | else |
293 | return 0; | 292 | return 0; |
294 | } | 293 | } |
295 | pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", | 294 | pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", |
296 | NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), | 295 | &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), |
297 | NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); | 296 | &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); |
298 | return -ENOENT; | 297 | return -ENOENT; |
299 | } | 298 | } |
300 | 299 | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4e8879220222..1fd3ef7718b6 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -272,7 +272,7 @@ static struct ctl_table icmp_sysctl_table[] = { | |||
272 | .data = &nf_ct_icmp_timeout, | 272 | .data = &nf_ct_icmp_timeout, |
273 | .maxlen = sizeof(unsigned int), | 273 | .maxlen = sizeof(unsigned int), |
274 | .mode = 0644, | 274 | .mode = 0644, |
275 | .proc_handler = &proc_dointvec_jiffies, | 275 | .proc_handler = proc_dointvec_jiffies, |
276 | }, | 276 | }, |
277 | { | 277 | { |
278 | .ctl_name = 0 | 278 | .ctl_name = 0 |
@@ -285,7 +285,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { | |||
285 | .data = &nf_ct_icmp_timeout, | 285 | .data = &nf_ct_icmp_timeout, |
286 | .maxlen = sizeof(unsigned int), | 286 | .maxlen = sizeof(unsigned int), |
287 | .mode = 0644, | 287 | .mode = 0644, |
288 | .proc_handler = &proc_dointvec_jiffies, | 288 | .proc_handler = proc_dointvec_jiffies, |
289 | }, | 289 | }, |
290 | { | 290 | { |
291 | .ctl_name = 0 | 291 | .ctl_name = 0 |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index ee47bf28c825..7e8e6fc75413 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -119,10 +119,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
119 | (ntohl(addr.ip) & 0xff000000) == 0x7f000000) | 119 | (ntohl(addr.ip) & 0xff000000) == 0x7f000000) |
120 | i = 0; | 120 | i = 0; |
121 | 121 | ||
122 | pr_debug("nf_nat_ras: set signal address " | 122 | pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", |
123 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 123 | &addr.ip, port, |
124 | NIPQUAD(addr.ip), port, | 124 | &ct->tuplehash[!dir].tuple.dst.u3.ip, |
125 | NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), | ||
126 | info->sig_port[!dir]); | 125 | info->sig_port[!dir]); |
127 | return set_h225_addr(skb, data, 0, &taddr[i], | 126 | return set_h225_addr(skb, data, 0, &taddr[i], |
128 | &ct->tuplehash[!dir]. | 127 | &ct->tuplehash[!dir]. |
@@ -131,10 +130,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
131 | } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && | 130 | } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && |
132 | port == info->sig_port[dir]) { | 131 | port == info->sig_port[dir]) { |
133 | /* GK->GW */ | 132 | /* GK->GW */ |
134 | pr_debug("nf_nat_ras: set signal address " | 133 | pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", |
135 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 134 | &addr.ip, port, |
136 | NIPQUAD(addr.ip), port, | 135 | &ct->tuplehash[!dir].tuple.src.u3.ip, |
137 | NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), | ||
138 | info->sig_port[!dir]); | 136 | info->sig_port[!dir]); |
139 | return set_h225_addr(skb, data, 0, &taddr[i], | 137 | return set_h225_addr(skb, data, 0, &taddr[i], |
140 | &ct->tuplehash[!dir]. | 138 | &ct->tuplehash[!dir]. |
@@ -162,10 +160,9 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
162 | if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && | 160 | if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && |
163 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && | 161 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && |
164 | port == ct->tuplehash[dir].tuple.src.u.udp.port) { | 162 | port == ct->tuplehash[dir].tuple.src.u.udp.port) { |
165 | pr_debug("nf_nat_ras: set rasAddress " | 163 | pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", |
166 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 164 | &addr.ip, ntohs(port), |
167 | NIPQUAD(addr.ip), ntohs(port), | 165 | &ct->tuplehash[!dir].tuple.dst.u3.ip, |
168 | NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), | ||
169 | ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); | 166 | ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); |
170 | return set_h225_addr(skb, data, 0, &taddr[i], | 167 | return set_h225_addr(skb, data, 0, &taddr[i], |
171 | &ct->tuplehash[!dir].tuple.dst.u3, | 168 | &ct->tuplehash[!dir].tuple.dst.u3, |
@@ -257,15 +254,15 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
257 | } | 254 | } |
258 | 255 | ||
259 | /* Success */ | 256 | /* Success */ |
260 | pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 257 | pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", |
261 | NIPQUAD(rtp_exp->tuple.src.u3.ip), | 258 | &rtp_exp->tuple.src.u3.ip, |
262 | ntohs(rtp_exp->tuple.src.u.udp.port), | 259 | ntohs(rtp_exp->tuple.src.u.udp.port), |
263 | NIPQUAD(rtp_exp->tuple.dst.u3.ip), | 260 | &rtp_exp->tuple.dst.u3.ip, |
264 | ntohs(rtp_exp->tuple.dst.u.udp.port)); | 261 | ntohs(rtp_exp->tuple.dst.u.udp.port)); |
265 | pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 262 | pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", |
266 | NIPQUAD(rtcp_exp->tuple.src.u3.ip), | 263 | &rtcp_exp->tuple.src.u3.ip, |
267 | ntohs(rtcp_exp->tuple.src.u.udp.port), | 264 | ntohs(rtcp_exp->tuple.src.u.udp.port), |
268 | NIPQUAD(rtcp_exp->tuple.dst.u3.ip), | 265 | &rtcp_exp->tuple.dst.u3.ip, |
269 | ntohs(rtcp_exp->tuple.dst.u.udp.port)); | 266 | ntohs(rtcp_exp->tuple.dst.u.udp.port)); |
270 | 267 | ||
271 | return 0; | 268 | return 0; |
@@ -307,10 +304,10 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
307 | return -1; | 304 | return -1; |
308 | } | 305 | } |
309 | 306 | ||
310 | pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 307 | pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", |
311 | NIPQUAD(exp->tuple.src.u3.ip), | 308 | &exp->tuple.src.u3.ip, |
312 | ntohs(exp->tuple.src.u.tcp.port), | 309 | ntohs(exp->tuple.src.u.tcp.port), |
313 | NIPQUAD(exp->tuple.dst.u3.ip), | 310 | &exp->tuple.dst.u3.ip, |
314 | ntohs(exp->tuple.dst.u.tcp.port)); | 311 | ntohs(exp->tuple.dst.u.tcp.port)); |
315 | 312 | ||
316 | return 0; | 313 | return 0; |
@@ -361,10 +358,10 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
361 | return -1; | 358 | return -1; |
362 | } | 359 | } |
363 | 360 | ||
364 | pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 361 | pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", |
365 | NIPQUAD(exp->tuple.src.u3.ip), | 362 | &exp->tuple.src.u3.ip, |
366 | ntohs(exp->tuple.src.u.tcp.port), | 363 | ntohs(exp->tuple.src.u.tcp.port), |
367 | NIPQUAD(exp->tuple.dst.u3.ip), | 364 | &exp->tuple.dst.u3.ip, |
368 | ntohs(exp->tuple.dst.u.tcp.port)); | 365 | ntohs(exp->tuple.dst.u.tcp.port)); |
369 | 366 | ||
370 | return 0; | 367 | return 0; |
@@ -455,10 +452,10 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
455 | } | 452 | } |
456 | 453 | ||
457 | /* Success */ | 454 | /* Success */ |
458 | pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 455 | pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", |
459 | NIPQUAD(exp->tuple.src.u3.ip), | 456 | &exp->tuple.src.u3.ip, |
460 | ntohs(exp->tuple.src.u.tcp.port), | 457 | ntohs(exp->tuple.src.u.tcp.port), |
461 | NIPQUAD(exp->tuple.dst.u3.ip), | 458 | &exp->tuple.dst.u3.ip, |
462 | ntohs(exp->tuple.dst.u.tcp.port)); | 459 | ntohs(exp->tuple.dst.u.tcp.port)); |
463 | 460 | ||
464 | return 0; | 461 | return 0; |
@@ -524,11 +521,10 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
524 | } | 521 | } |
525 | 522 | ||
526 | /* Success */ | 523 | /* Success */ |
527 | pr_debug("nf_nat_q931: expect Call Forwarding " | 524 | pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", |
528 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | 525 | &exp->tuple.src.u3.ip, |
529 | NIPQUAD(exp->tuple.src.u3.ip), | ||
530 | ntohs(exp->tuple.src.u.tcp.port), | 526 | ntohs(exp->tuple.src.u.tcp.port), |
531 | NIPQUAD(exp->tuple.dst.u3.ip), | 527 | &exp->tuple.dst.u3.ip, |
532 | ntohs(exp->tuple.dst.u.tcp.port)); | 528 | ntohs(exp->tuple.dst.u.tcp.port)); |
533 | 529 | ||
534 | return 0; | 530 | return 0; |
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index fe6f9cef6c85..ea83a886b03e 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c | |||
@@ -55,8 +55,8 @@ static unsigned int help(struct sk_buff *skb, | |||
55 | 55 | ||
56 | ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); | 56 | ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); |
57 | sprintf(buffer, "%u %u", ip, port); | 57 | sprintf(buffer, "%u %u", ip, port); |
58 | pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", | 58 | pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", |
59 | buffer, NIPQUAD(ip), port); | 59 | buffer, &ip, port); |
60 | 60 | ||
61 | ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, | 61 | ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, |
62 | matchoff, matchlen, buffer, | 62 | matchoff, matchlen, buffer, |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 8d489e746b21..a7eb04719044 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -86,25 +86,6 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) | |||
86 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); | 86 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); |
87 | } | 87 | } |
88 | 88 | ||
89 | /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ | ||
90 | static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) | ||
91 | { | ||
92 | static int warned = 0; | ||
93 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; | ||
94 | struct rtable *rt; | ||
95 | |||
96 | if (ip_route_output_key(net, &rt, &fl) != 0) | ||
97 | return; | ||
98 | |||
99 | if (rt->rt_src != srcip && !warned) { | ||
100 | printk("NAT: no longer support implicit source local NAT\n"); | ||
101 | printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", | ||
102 | NIPQUAD(srcip), NIPQUAD(dstip)); | ||
103 | warned = 1; | ||
104 | } | ||
105 | ip_rt_put(rt); | ||
106 | } | ||
107 | |||
108 | static unsigned int | 89 | static unsigned int |
109 | ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) | 90 | ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) |
110 | { | 91 | { |
@@ -120,11 +101,6 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) | |||
120 | /* Connection must be valid and new. */ | 101 | /* Connection must be valid and new. */ |
121 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | 102 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); |
122 | 103 | ||
123 | if (par->hooknum == NF_INET_LOCAL_OUT && | ||
124 | mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) | ||
125 | warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, | ||
126 | mr->range[0].min_ip); | ||
127 | |||
128 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); | 104 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); |
129 | } | 105 | } |
130 | 106 | ||
@@ -166,8 +142,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | |||
166 | struct nf_nat_range range | 142 | struct nf_nat_range range |
167 | = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; | 143 | = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; |
168 | 144 | ||
169 | pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", | 145 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); |
170 | ct, NIPQUAD(ip)); | ||
171 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | 146 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); |
172 | } | 147 | } |
173 | 148 | ||
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 14544320c545..07d61a57613c 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -74,8 +74,7 @@ static int map_addr(struct sk_buff *skb, | |||
74 | if (newaddr == addr->ip && newport == port) | 74 | if (newaddr == addr->ip && newport == port) |
75 | return 1; | 75 | return 1; |
76 | 76 | ||
77 | buflen = sprintf(buffer, "%u.%u.%u.%u:%u", | 77 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); |
78 | NIPQUAD(newaddr), ntohs(newport)); | ||
79 | 78 | ||
80 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 79 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, |
81 | buffer, buflen); | 80 | buffer, buflen); |
@@ -152,8 +151,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
152 | &addr) > 0 && | 151 | &addr) > 0 && |
153 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && | 152 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && |
154 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | 153 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { |
155 | __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip; | 154 | buflen = sprintf(buffer, "%pI4", |
156 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | 155 | &ct->tuplehash[!dir].tuple.dst.u3.ip); |
157 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 156 | if (!mangle_packet(skb, dptr, datalen, poff, plen, |
158 | buffer, buflen)) | 157 | buffer, buflen)) |
159 | return NF_DROP; | 158 | return NF_DROP; |
@@ -166,8 +165,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, | |||
166 | &addr) > 0 && | 165 | &addr) > 0 && |
167 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && | 166 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && |
168 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | 167 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { |
169 | __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip; | 168 | buflen = sprintf(buffer, "%pI4", |
170 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | 169 | &ct->tuplehash[!dir].tuple.src.u3.ip); |
171 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | 170 | if (!mangle_packet(skb, dptr, datalen, poff, plen, |
172 | buffer, buflen)) | 171 | buffer, buflen)) |
173 | return NF_DROP; | 172 | return NF_DROP; |
@@ -279,8 +278,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, | |||
279 | 278 | ||
280 | if (exp->tuple.dst.u3.ip != exp->saved_ip || | 279 | if (exp->tuple.dst.u3.ip != exp->saved_ip || |
281 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { | 280 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { |
282 | buflen = sprintf(buffer, "%u.%u.%u.%u:%u", | 281 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); |
283 | NIPQUAD(newip), port); | ||
284 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, | 282 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, |
285 | buffer, buflen)) | 283 | buffer, buflen)) |
286 | goto err; | 284 | goto err; |
@@ -345,7 +343,7 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | |||
345 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | 343 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; |
346 | unsigned int buflen; | 344 | unsigned int buflen; |
347 | 345 | ||
348 | buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); | 346 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
349 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | 347 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, |
350 | buffer, buflen)) | 348 | buffer, buflen)) |
351 | return 0; | 349 | return 0; |
@@ -380,7 +378,7 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | |||
380 | unsigned int buflen; | 378 | unsigned int buflen; |
381 | 379 | ||
382 | /* Mangle session description owner and contact addresses */ | 380 | /* Mangle session description owner and contact addresses */ |
383 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); | 381 | buflen = sprintf(buffer, "%pI4", &addr->ip); |
384 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, | 382 | if (mangle_sdp_packet(skb, dptr, dataoff, datalen, |
385 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | 383 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, |
386 | buffer, buflen)) | 384 | buffer, buflen)) |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8303e4b406c0..182f845de92f 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -930,8 +930,8 @@ static inline void mangle_address(unsigned char *begin, | |||
930 | } | 930 | } |
931 | 931 | ||
932 | if (debug) | 932 | if (debug) |
933 | printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " | 933 | printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", |
934 | "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); | 934 | &old, addr); |
935 | } | 935 | } |
936 | } | 936 | } |
937 | 937 | ||
@@ -1267,9 +1267,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, | |||
1267 | */ | 1267 | */ |
1268 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { | 1268 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { |
1269 | if (net_ratelimit()) | 1269 | if (net_ratelimit()) |
1270 | printk(KERN_WARNING "SNMP: dropping malformed packet " | 1270 | printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", |
1271 | "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", | 1271 | &iph->saddr, &iph->daddr); |
1272 | NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); | ||
1273 | return NF_DROP; | 1272 | return NF_DROP; |
1274 | } | 1273 | } |
1275 | 1274 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a631a1f110ca..614958b7c276 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -54,8 +54,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
54 | socket_seq_show(seq); | 54 | socket_seq_show(seq); |
55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
56 | sock_prot_inuse_get(net, &tcp_prot), | 56 | sock_prot_inuse_get(net, &tcp_prot), |
57 | atomic_read(&tcp_orphan_count), | 57 | (int)percpu_counter_sum_positive(&tcp_orphan_count), |
58 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), | 58 | tcp_death_row.tw_count, |
59 | (int)percpu_counter_sum_positive(&tcp_sockets_allocated), | ||
59 | atomic_read(&tcp_memory_allocated)); | 60 | atomic_read(&tcp_memory_allocated)); |
60 | seq_printf(seq, "UDP: inuse %d mem %d\n", | 61 | seq_printf(seq, "UDP: inuse %d mem %d\n", |
61 | sock_prot_inuse_get(net, &udp_prot), | 62 | sock_prot_inuse_get(net, &udp_prot), |
@@ -234,6 +235,9 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
234 | SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), | 235 | SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), |
235 | SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), | 236 | SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), |
236 | SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), | 237 | SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), |
238 | SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), | ||
239 | SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), | ||
240 | SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), | ||
237 | SNMP_MIB_SENTINEL | 241 | SNMP_MIB_SENTINEL |
238 | }; | 242 | }; |
239 | 243 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cd975743bcd2..dff8bc4e0fac 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) | |||
247 | } | 247 | } |
248 | 248 | ||
249 | if (inet->recverr) { | 249 | if (inet->recverr) { |
250 | struct iphdr *iph = (struct iphdr*)skb->data; | 250 | struct iphdr *iph = (struct iphdr *)skb->data; |
251 | u8 *payload = skb->data + (iph->ihl << 2); | 251 | u8 *payload = skb->data + (iph->ihl << 2); |
252 | 252 | ||
253 | if (inet->hdrincl) | 253 | if (inet->hdrincl) |
@@ -465,7 +465,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
465 | */ | 465 | */ |
466 | 466 | ||
467 | if (msg->msg_namelen) { | 467 | if (msg->msg_namelen) { |
468 | struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name; | 468 | struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; |
469 | err = -EINVAL; | 469 | err = -EINVAL; |
470 | if (msg->msg_namelen < sizeof(*usin)) | 470 | if (msg->msg_namelen < sizeof(*usin)) |
471 | goto out; | 471 | goto out; |
@@ -572,7 +572,7 @@ back_from_confirm: | |||
572 | ipc.addr = rt->rt_dst; | 572 | ipc.addr = rt->rt_dst; |
573 | lock_sock(sk); | 573 | lock_sock(sk); |
574 | err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, | 574 | err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, |
575 | &ipc, rt, msg->msg_flags); | 575 | &ipc, &rt, msg->msg_flags); |
576 | if (err) | 576 | if (err) |
577 | ip_flush_pending_frames(sk); | 577 | ip_flush_pending_frames(sk); |
578 | else if (!(msg->msg_flags & MSG_MORE)) | 578 | else if (!(msg->msg_flags & MSG_MORE)) |
@@ -851,7 +851,7 @@ struct proto raw_prot = { | |||
851 | static struct sock *raw_get_first(struct seq_file *seq) | 851 | static struct sock *raw_get_first(struct seq_file *seq) |
852 | { | 852 | { |
853 | struct sock *sk; | 853 | struct sock *sk; |
854 | struct raw_iter_state* state = raw_seq_private(seq); | 854 | struct raw_iter_state *state = raw_seq_private(seq); |
855 | 855 | ||
856 | for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; | 856 | for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; |
857 | ++state->bucket) { | 857 | ++state->bucket) { |
@@ -868,7 +868,7 @@ found: | |||
868 | 868 | ||
869 | static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) | 869 | static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) |
870 | { | 870 | { |
871 | struct raw_iter_state* state = raw_seq_private(seq); | 871 | struct raw_iter_state *state = raw_seq_private(seq); |
872 | 872 | ||
873 | do { | 873 | do { |
874 | sk = sk_next(sk); | 874 | sk = sk_next(sk); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2ea6dcc3e2cc..77bfba975959 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | |||
129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
130 | static int ip_rt_min_advmss __read_mostly = 256; | 130 | static int ip_rt_min_advmss __read_mostly = 256; |
131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | 131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; |
132 | static int rt_chain_length_max __read_mostly = 20; | ||
132 | 133 | ||
133 | static void rt_worker_func(struct work_struct *work); | 134 | static void rt_worker_func(struct work_struct *work); |
134 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 135 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
@@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | |||
145 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
146 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
147 | static int rt_garbage_collect(struct dst_ops *ops); | 148 | static int rt_garbage_collect(struct dst_ops *ops); |
149 | static void rt_emergency_hash_rebuild(struct net *net); | ||
148 | 150 | ||
149 | 151 | ||
150 | static struct dst_ops ipv4_dst_ops = { | 152 | static struct dst_ops ipv4_dst_ops = { |
@@ -158,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { | |||
158 | .link_failure = ipv4_link_failure, | 160 | .link_failure = ipv4_link_failure, |
159 | .update_pmtu = ip_rt_update_pmtu, | 161 | .update_pmtu = ip_rt_update_pmtu, |
160 | .local_out = __ip_local_out, | 162 | .local_out = __ip_local_out, |
161 | .entry_size = sizeof(struct rtable), | ||
162 | .entries = ATOMIC_INIT(0), | 163 | .entries = ATOMIC_INIT(0), |
163 | }; | 164 | }; |
164 | 165 | ||
@@ -201,6 +202,7 @@ const __u8 ip_tos2prio[16] = { | |||
201 | struct rt_hash_bucket { | 202 | struct rt_hash_bucket { |
202 | struct rtable *chain; | 203 | struct rtable *chain; |
203 | }; | 204 | }; |
205 | |||
204 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | 206 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
205 | defined(CONFIG_PROVE_LOCKING) | 207 | defined(CONFIG_PROVE_LOCKING) |
206 | /* | 208 | /* |
@@ -674,6 +676,20 @@ static inline u32 rt_score(struct rtable *rt) | |||
674 | return score; | 676 | return score; |
675 | } | 677 | } |
676 | 678 | ||
679 | static inline bool rt_caching(const struct net *net) | ||
680 | { | ||
681 | return net->ipv4.current_rt_cache_rebuild_count <= | ||
682 | net->ipv4.sysctl_rt_cache_rebuild_count; | ||
683 | } | ||
684 | |||
685 | static inline bool compare_hash_inputs(const struct flowi *fl1, | ||
686 | const struct flowi *fl2) | ||
687 | { | ||
688 | return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | ||
689 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | | ||
690 | (fl1->iif ^ fl2->iif)) == 0); | ||
691 | } | ||
692 | |||
677 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 693 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
678 | { | 694 | { |
679 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 695 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | |
@@ -753,11 +769,24 @@ static void rt_do_flush(int process_context) | |||
753 | } | 769 | } |
754 | } | 770 | } |
755 | 771 | ||
772 | /* | ||
773 | * While freeing expired entries, we compute average chain length | ||
774 | * and standard deviation, using fixed-point arithmetic. | ||
775 | * This to have an estimation of rt_chain_length_max | ||
776 | * rt_chain_length_max = max(elasticity, AVG + 4*SD) | ||
777 | * We use 3 bits for frational part, and 29 (or 61) for magnitude. | ||
778 | */ | ||
779 | |||
780 | #define FRACT_BITS 3 | ||
781 | #define ONE (1UL << FRACT_BITS) | ||
782 | |||
756 | static void rt_check_expire(void) | 783 | static void rt_check_expire(void) |
757 | { | 784 | { |
758 | static unsigned int rover; | 785 | static unsigned int rover; |
759 | unsigned int i = rover, goal; | 786 | unsigned int i = rover, goal; |
760 | struct rtable *rth, **rthp; | 787 | struct rtable *rth, **rthp; |
788 | unsigned long length = 0, samples = 0; | ||
789 | unsigned long sum = 0, sum2 = 0; | ||
761 | u64 mult; | 790 | u64 mult; |
762 | 791 | ||
763 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; | 792 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; |
@@ -766,6 +795,7 @@ static void rt_check_expire(void) | |||
766 | goal = (unsigned int)mult; | 795 | goal = (unsigned int)mult; |
767 | if (goal > rt_hash_mask) | 796 | if (goal > rt_hash_mask) |
768 | goal = rt_hash_mask + 1; | 797 | goal = rt_hash_mask + 1; |
798 | length = 0; | ||
769 | for (; goal > 0; goal--) { | 799 | for (; goal > 0; goal--) { |
770 | unsigned long tmo = ip_rt_gc_timeout; | 800 | unsigned long tmo = ip_rt_gc_timeout; |
771 | 801 | ||
@@ -775,6 +805,8 @@ static void rt_check_expire(void) | |||
775 | if (need_resched()) | 805 | if (need_resched()) |
776 | cond_resched(); | 806 | cond_resched(); |
777 | 807 | ||
808 | samples++; | ||
809 | |||
778 | if (*rthp == NULL) | 810 | if (*rthp == NULL) |
779 | continue; | 811 | continue; |
780 | spin_lock_bh(rt_hash_lock_addr(i)); | 812 | spin_lock_bh(rt_hash_lock_addr(i)); |
@@ -789,11 +821,29 @@ static void rt_check_expire(void) | |||
789 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 821 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
790 | tmo >>= 1; | 822 | tmo >>= 1; |
791 | rthp = &rth->u.dst.rt_next; | 823 | rthp = &rth->u.dst.rt_next; |
824 | /* | ||
825 | * Only bump our length if the hash | ||
826 | * inputs on entries n and n+1 are not | ||
827 | * the same, we only count entries on | ||
828 | * a chain with equal hash inputs once | ||
829 | * so that entries for different QOS | ||
830 | * levels, and other non-hash input | ||
831 | * attributes don't unfairly skew | ||
832 | * the length computation | ||
833 | */ | ||
834 | if ((*rthp == NULL) || | ||
835 | !compare_hash_inputs(&(*rthp)->fl, | ||
836 | &rth->fl)) | ||
837 | length += ONE; | ||
792 | continue; | 838 | continue; |
793 | } | 839 | } |
794 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | 840 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { |
795 | tmo >>= 1; | 841 | tmo >>= 1; |
796 | rthp = &rth->u.dst.rt_next; | 842 | rthp = &rth->u.dst.rt_next; |
843 | if ((*rthp == NULL) || | ||
844 | !compare_hash_inputs(&(*rthp)->fl, | ||
845 | &rth->fl)) | ||
846 | length += ONE; | ||
797 | continue; | 847 | continue; |
798 | } | 848 | } |
799 | 849 | ||
@@ -802,6 +852,15 @@ static void rt_check_expire(void) | |||
802 | rt_free(rth); | 852 | rt_free(rth); |
803 | } | 853 | } |
804 | spin_unlock_bh(rt_hash_lock_addr(i)); | 854 | spin_unlock_bh(rt_hash_lock_addr(i)); |
855 | sum += length; | ||
856 | sum2 += length*length; | ||
857 | } | ||
858 | if (samples) { | ||
859 | unsigned long avg = sum / samples; | ||
860 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
861 | rt_chain_length_max = max_t(unsigned long, | ||
862 | ip_rt_gc_elasticity, | ||
863 | (avg + 4*sd) >> FRACT_BITS); | ||
805 | } | 864 | } |
806 | rover = i; | 865 | rover = i; |
807 | } | 866 | } |
@@ -851,6 +910,26 @@ static void rt_secret_rebuild(unsigned long __net) | |||
851 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); | 910 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); |
852 | } | 911 | } |
853 | 912 | ||
913 | static void rt_secret_rebuild_oneshot(struct net *net) | ||
914 | { | ||
915 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
916 | rt_cache_invalidate(net); | ||
917 | if (ip_rt_secret_interval) { | ||
918 | net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; | ||
919 | add_timer(&net->ipv4.rt_secret_timer); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static void rt_emergency_hash_rebuild(struct net *net) | ||
924 | { | ||
925 | if (net_ratelimit()) { | ||
926 | printk(KERN_WARNING "Route hash chain too long!\n"); | ||
927 | printk(KERN_WARNING "Adjust your secret_interval!\n"); | ||
928 | } | ||
929 | |||
930 | rt_secret_rebuild_oneshot(net); | ||
931 | } | ||
932 | |||
854 | /* | 933 | /* |
855 | Short description of GC goals. | 934 | Short description of GC goals. |
856 | 935 | ||
@@ -989,6 +1068,7 @@ out: return 0; | |||
989 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) | 1068 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) |
990 | { | 1069 | { |
991 | struct rtable *rth, **rthp; | 1070 | struct rtable *rth, **rthp; |
1071 | struct rtable *rthi; | ||
992 | unsigned long now; | 1072 | unsigned long now; |
993 | struct rtable *cand, **candp; | 1073 | struct rtable *cand, **candp; |
994 | u32 min_score; | 1074 | u32 min_score; |
@@ -1002,7 +1082,13 @@ restart: | |||
1002 | candp = NULL; | 1082 | candp = NULL; |
1003 | now = jiffies; | 1083 | now = jiffies; |
1004 | 1084 | ||
1085 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | ||
1086 | rt_drop(rt); | ||
1087 | return 0; | ||
1088 | } | ||
1089 | |||
1005 | rthp = &rt_hash_table[hash].chain; | 1090 | rthp = &rt_hash_table[hash].chain; |
1091 | rthi = NULL; | ||
1006 | 1092 | ||
1007 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1093 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1008 | while ((rth = *rthp) != NULL) { | 1094 | while ((rth = *rthp) != NULL) { |
@@ -1048,6 +1134,17 @@ restart: | |||
1048 | chain_length++; | 1134 | chain_length++; |
1049 | 1135 | ||
1050 | rthp = &rth->u.dst.rt_next; | 1136 | rthp = &rth->u.dst.rt_next; |
1137 | |||
1138 | /* | ||
1139 | * check to see if the next entry in the chain | ||
1140 | * contains the same hash input values as rt. If it does | ||
1141 | * This is where we will insert into the list, instead of | ||
1142 | * at the head. This groups entries that differ by aspects not | ||
1143 | * relvant to the hash function together, which we use to adjust | ||
1144 | * our chain length | ||
1145 | */ | ||
1146 | if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) | ||
1147 | rthi = rth; | ||
1051 | } | 1148 | } |
1052 | 1149 | ||
1053 | if (cand) { | 1150 | if (cand) { |
@@ -1061,6 +1158,16 @@ restart: | |||
1061 | *candp = cand->u.dst.rt_next; | 1158 | *candp = cand->u.dst.rt_next; |
1062 | rt_free(cand); | 1159 | rt_free(cand); |
1063 | } | 1160 | } |
1161 | } else { | ||
1162 | if (chain_length > rt_chain_length_max) { | ||
1163 | struct net *net = dev_net(rt->u.dst.dev); | ||
1164 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | ||
1165 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | ||
1166 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | ||
1167 | rt->u.dst.dev->name, num); | ||
1168 | } | ||
1169 | rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); | ||
1170 | } | ||
1064 | } | 1171 | } |
1065 | 1172 | ||
1066 | /* Try to bind route to arp only if it is output | 1173 | /* Try to bind route to arp only if it is output |
@@ -1098,14 +1205,17 @@ restart: | |||
1098 | } | 1205 | } |
1099 | } | 1206 | } |
1100 | 1207 | ||
1101 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | 1208 | if (rthi) |
1209 | rt->u.dst.rt_next = rthi->u.dst.rt_next; | ||
1210 | else | ||
1211 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | ||
1212 | |||
1102 | #if RT_CACHE_DEBUG >= 2 | 1213 | #if RT_CACHE_DEBUG >= 2 |
1103 | if (rt->u.dst.rt_next) { | 1214 | if (rt->u.dst.rt_next) { |
1104 | struct rtable *trt; | 1215 | struct rtable *trt; |
1105 | printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, | 1216 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); |
1106 | NIPQUAD(rt->rt_dst)); | ||
1107 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1217 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) |
1108 | printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); | 1218 | printk(" . %pI4", &trt->rt_dst); |
1109 | printk("\n"); | 1219 | printk("\n"); |
1110 | } | 1220 | } |
1111 | #endif | 1221 | #endif |
@@ -1114,7 +1224,11 @@ restart: | |||
1114 | * previous writes to rt are comitted to memory | 1224 | * previous writes to rt are comitted to memory |
1115 | * before making rt visible to other CPUS. | 1225 | * before making rt visible to other CPUS. |
1116 | */ | 1226 | */ |
1117 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | 1227 | if (rthi) |
1228 | rcu_assign_pointer(rthi->u.dst.rt_next, rt); | ||
1229 | else | ||
1230 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | ||
1231 | |||
1118 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1232 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1119 | *rp = rt; | 1233 | *rp = rt; |
1120 | return 0; | 1234 | return 0; |
@@ -1217,6 +1331,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1217 | || ipv4_is_zeronet(new_gw)) | 1331 | || ipv4_is_zeronet(new_gw)) |
1218 | goto reject_redirect; | 1332 | goto reject_redirect; |
1219 | 1333 | ||
1334 | if (!rt_caching(net)) | ||
1335 | goto reject_redirect; | ||
1336 | |||
1220 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1337 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1221 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1338 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1222 | goto reject_redirect; | 1339 | goto reject_redirect; |
@@ -1267,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1267 | 1384 | ||
1268 | /* Copy all the information. */ | 1385 | /* Copy all the information. */ |
1269 | *rt = *rth; | 1386 | *rt = *rth; |
1270 | INIT_RCU_HEAD(&rt->u.dst.rcu_head); | ||
1271 | rt->u.dst.__use = 1; | 1387 | rt->u.dst.__use = 1; |
1272 | atomic_set(&rt->u.dst.__refcnt, 1); | 1388 | atomic_set(&rt->u.dst.__refcnt, 1); |
1273 | rt->u.dst.child = NULL; | 1389 | rt->u.dst.child = NULL; |
@@ -1280,7 +1396,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1280 | rt->u.dst.path = &rt->u.dst; | 1396 | rt->u.dst.path = &rt->u.dst; |
1281 | rt->u.dst.neighbour = NULL; | 1397 | rt->u.dst.neighbour = NULL; |
1282 | rt->u.dst.hh = NULL; | 1398 | rt->u.dst.hh = NULL; |
1399 | #ifdef CONFIG_XFRM | ||
1283 | rt->u.dst.xfrm = NULL; | 1400 | rt->u.dst.xfrm = NULL; |
1401 | #endif | ||
1284 | rt->rt_genid = rt_genid(net); | 1402 | rt->rt_genid = rt_genid(net); |
1285 | rt->rt_flags |= RTCF_REDIRECTED; | 1403 | rt->rt_flags |= RTCF_REDIRECTED; |
1286 | 1404 | ||
@@ -1324,11 +1442,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1324 | reject_redirect: | 1442 | reject_redirect: |
1325 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1443 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1326 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1444 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
1327 | printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " | 1445 | printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" |
1328 | NIPQUAD_FMT " ignored.\n" | 1446 | " Advised path = %pI4 -> %pI4\n", |
1329 | " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", | 1447 | &old_gw, dev->name, &new_gw, |
1330 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), | 1448 | &saddr, &daddr); |
1331 | NIPQUAD(saddr), NIPQUAD(daddr)); | ||
1332 | #endif | 1449 | #endif |
1333 | in_dev_put(in_dev); | 1450 | in_dev_put(in_dev); |
1334 | } | 1451 | } |
@@ -1348,9 +1465,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1348 | rt->fl.oif, | 1465 | rt->fl.oif, |
1349 | rt_genid(dev_net(dst->dev))); | 1466 | rt_genid(dev_net(dst->dev))); |
1350 | #if RT_CACHE_DEBUG >= 1 | 1467 | #if RT_CACHE_DEBUG >= 1 |
1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " | 1468 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", |
1352 | NIPQUAD_FMT "/%02x dropped\n", | 1469 | &rt->rt_dst, rt->fl.fl4_tos); |
1353 | NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); | ||
1354 | #endif | 1470 | #endif |
1355 | rt_del(hash, rt); | 1471 | rt_del(hash, rt); |
1356 | ret = NULL; | 1472 | ret = NULL; |
@@ -1414,10 +1530,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1414 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 1530 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
1415 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1531 | rt->u.dst.rate_tokens == ip_rt_redirect_number && |
1416 | net_ratelimit()) | 1532 | net_ratelimit()) |
1417 | printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " | 1533 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1418 | "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", | 1534 | &rt->rt_src, rt->rt_iif, |
1419 | NIPQUAD(rt->rt_src), rt->rt_iif, | 1535 | &rt->rt_dst, &rt->rt_gateway); |
1420 | NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); | ||
1421 | #endif | 1536 | #endif |
1422 | } | 1537 | } |
1423 | out: | 1538 | out: |
@@ -1610,8 +1725,8 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1610 | 1725 | ||
1611 | static int ip_rt_bug(struct sk_buff *skb) | 1726 | static int ip_rt_bug(struct sk_buff *skb) |
1612 | { | 1727 | { |
1613 | printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", | 1728 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", |
1614 | NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), | 1729 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1615 | skb->dev ? skb->dev->name : "?"); | 1730 | skb->dev ? skb->dev->name : "?"); |
1616 | kfree_skb(skb); | 1731 | kfree_skb(skb); |
1617 | return 0; | 1732 | return 0; |
@@ -1788,9 +1903,8 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1788 | * RFC1812 recommendation, if source is martian, | 1903 | * RFC1812 recommendation, if source is martian, |
1789 | * the only hint is MAC header. | 1904 | * the only hint is MAC header. |
1790 | */ | 1905 | */ |
1791 | printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " | 1906 | printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", |
1792 | NIPQUAD_FMT", on dev %s\n", | 1907 | &daddr, &saddr, dev->name); |
1793 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | ||
1794 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { | 1908 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1795 | int i; | 1909 | int i; |
1796 | const unsigned char *p = skb_mac_header(skb); | 1910 | const unsigned char *p = skb_mac_header(skb); |
@@ -2099,9 +2213,8 @@ martian_destination: | |||
2099 | RT_CACHE_STAT_INC(in_martian_dst); | 2213 | RT_CACHE_STAT_INC(in_martian_dst); |
2100 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2214 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2101 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2215 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
2102 | printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " | 2216 | printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", |
2103 | NIPQUAD_FMT ", dev %s\n", | 2217 | &daddr, &saddr, dev->name); |
2104 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | ||
2105 | #endif | 2218 | #endif |
2106 | 2219 | ||
2107 | e_hostunreach: | 2220 | e_hostunreach: |
@@ -2130,6 +2243,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2130 | struct net *net; | 2243 | struct net *net; |
2131 | 2244 | ||
2132 | net = dev_net(dev); | 2245 | net = dev_net(dev); |
2246 | |||
2247 | if (!rt_caching(net)) | ||
2248 | goto skip_cache; | ||
2249 | |||
2133 | tos &= IPTOS_RT_MASK; | 2250 | tos &= IPTOS_RT_MASK; |
2134 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2251 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
2135 | 2252 | ||
@@ -2154,6 +2271,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2154 | } | 2271 | } |
2155 | rcu_read_unlock(); | 2272 | rcu_read_unlock(); |
2156 | 2273 | ||
2274 | skip_cache: | ||
2157 | /* Multicast recognition logic is moved from route cache to here. | 2275 | /* Multicast recognition logic is moved from route cache to here. |
2158 | The problem was that too many Ethernet cards have broken/missing | 2276 | The problem was that too many Ethernet cards have broken/missing |
2159 | hardware multicast filters :-( As result the host on multicasting | 2277 | hardware multicast filters :-( As result the host on multicasting |
@@ -2539,6 +2657,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2539 | unsigned hash; | 2657 | unsigned hash; |
2540 | struct rtable *rth; | 2658 | struct rtable *rth; |
2541 | 2659 | ||
2660 | if (!rt_caching(net)) | ||
2661 | goto slow_output; | ||
2662 | |||
2542 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2663 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
2543 | 2664 | ||
2544 | rcu_read_lock_bh(); | 2665 | rcu_read_lock_bh(); |
@@ -2563,6 +2684,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2563 | } | 2684 | } |
2564 | rcu_read_unlock_bh(); | 2685 | rcu_read_unlock_bh(); |
2565 | 2686 | ||
2687 | slow_output: | ||
2566 | return ip_route_output_slow(net, rp, flp); | 2688 | return ip_route_output_slow(net, rp, flp); |
2567 | } | 2689 | } |
2568 | 2690 | ||
@@ -2578,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2578 | .destroy = ipv4_dst_destroy, | 2700 | .destroy = ipv4_dst_destroy, |
2579 | .check = ipv4_dst_check, | 2701 | .check = ipv4_dst_check, |
2580 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2702 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2581 | .entry_size = sizeof(struct rtable), | ||
2582 | .entries = ATOMIC_INIT(0), | 2703 | .entries = ATOMIC_INIT(0), |
2583 | }; | 2704 | }; |
2584 | 2705 | ||
@@ -2640,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
2640 | flp->fl4_src = (*rp)->rt_src; | 2761 | flp->fl4_src = (*rp)->rt_src; |
2641 | if (!flp->fl4_dst) | 2762 | if (!flp->fl4_dst) |
2642 | flp->fl4_dst = (*rp)->rt_dst; | 2763 | flp->fl4_dst = (*rp)->rt_dst; |
2643 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, | 2764 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, |
2644 | flags ? XFRM_LOOKUP_WAIT : 0); | 2765 | flags ? XFRM_LOOKUP_WAIT : 0); |
2645 | if (err == -EREMOTE) | 2766 | if (err == -EREMOTE) |
2646 | err = ipv4_dst_blackhole(net, rp, flp); | 2767 | err = ipv4_dst_blackhole(net, rp, flp); |
@@ -2995,7 +3116,7 @@ static ctl_table ipv4_route_table[] = { | |||
2995 | .data = &ipv4_dst_ops.gc_thresh, | 3116 | .data = &ipv4_dst_ops.gc_thresh, |
2996 | .maxlen = sizeof(int), | 3117 | .maxlen = sizeof(int), |
2997 | .mode = 0644, | 3118 | .mode = 0644, |
2998 | .proc_handler = &proc_dointvec, | 3119 | .proc_handler = proc_dointvec, |
2999 | }, | 3120 | }, |
3000 | { | 3121 | { |
3001 | .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, | 3122 | .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, |
@@ -3003,7 +3124,7 @@ static ctl_table ipv4_route_table[] = { | |||
3003 | .data = &ip_rt_max_size, | 3124 | .data = &ip_rt_max_size, |
3004 | .maxlen = sizeof(int), | 3125 | .maxlen = sizeof(int), |
3005 | .mode = 0644, | 3126 | .mode = 0644, |
3006 | .proc_handler = &proc_dointvec, | 3127 | .proc_handler = proc_dointvec, |
3007 | }, | 3128 | }, |
3008 | { | 3129 | { |
3009 | /* Deprecated. Use gc_min_interval_ms */ | 3130 | /* Deprecated. Use gc_min_interval_ms */ |
@@ -3013,8 +3134,8 @@ static ctl_table ipv4_route_table[] = { | |||
3013 | .data = &ip_rt_gc_min_interval, | 3134 | .data = &ip_rt_gc_min_interval, |
3014 | .maxlen = sizeof(int), | 3135 | .maxlen = sizeof(int), |
3015 | .mode = 0644, | 3136 | .mode = 0644, |
3016 | .proc_handler = &proc_dointvec_jiffies, | 3137 | .proc_handler = proc_dointvec_jiffies, |
3017 | .strategy = &sysctl_jiffies, | 3138 | .strategy = sysctl_jiffies, |
3018 | }, | 3139 | }, |
3019 | { | 3140 | { |
3020 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, | 3141 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, |
@@ -3022,8 +3143,8 @@ static ctl_table ipv4_route_table[] = { | |||
3022 | .data = &ip_rt_gc_min_interval, | 3143 | .data = &ip_rt_gc_min_interval, |
3023 | .maxlen = sizeof(int), | 3144 | .maxlen = sizeof(int), |
3024 | .mode = 0644, | 3145 | .mode = 0644, |
3025 | .proc_handler = &proc_dointvec_ms_jiffies, | 3146 | .proc_handler = proc_dointvec_ms_jiffies, |
3026 | .strategy = &sysctl_ms_jiffies, | 3147 | .strategy = sysctl_ms_jiffies, |
3027 | }, | 3148 | }, |
3028 | { | 3149 | { |
3029 | .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, | 3150 | .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, |
@@ -3031,8 +3152,8 @@ static ctl_table ipv4_route_table[] = { | |||
3031 | .data = &ip_rt_gc_timeout, | 3152 | .data = &ip_rt_gc_timeout, |
3032 | .maxlen = sizeof(int), | 3153 | .maxlen = sizeof(int), |
3033 | .mode = 0644, | 3154 | .mode = 0644, |
3034 | .proc_handler = &proc_dointvec_jiffies, | 3155 | .proc_handler = proc_dointvec_jiffies, |
3035 | .strategy = &sysctl_jiffies, | 3156 | .strategy = sysctl_jiffies, |
3036 | }, | 3157 | }, |
3037 | { | 3158 | { |
3038 | .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, | 3159 | .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, |
@@ -3040,8 +3161,8 @@ static ctl_table ipv4_route_table[] = { | |||
3040 | .data = &ip_rt_gc_interval, | 3161 | .data = &ip_rt_gc_interval, |
3041 | .maxlen = sizeof(int), | 3162 | .maxlen = sizeof(int), |
3042 | .mode = 0644, | 3163 | .mode = 0644, |
3043 | .proc_handler = &proc_dointvec_jiffies, | 3164 | .proc_handler = proc_dointvec_jiffies, |
3044 | .strategy = &sysctl_jiffies, | 3165 | .strategy = sysctl_jiffies, |
3045 | }, | 3166 | }, |
3046 | { | 3167 | { |
3047 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, | 3168 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, |
@@ -3049,7 +3170,7 @@ static ctl_table ipv4_route_table[] = { | |||
3049 | .data = &ip_rt_redirect_load, | 3170 | .data = &ip_rt_redirect_load, |
3050 | .maxlen = sizeof(int), | 3171 | .maxlen = sizeof(int), |
3051 | .mode = 0644, | 3172 | .mode = 0644, |
3052 | .proc_handler = &proc_dointvec, | 3173 | .proc_handler = proc_dointvec, |
3053 | }, | 3174 | }, |
3054 | { | 3175 | { |
3055 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, | 3176 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, |
@@ -3057,7 +3178,7 @@ static ctl_table ipv4_route_table[] = { | |||
3057 | .data = &ip_rt_redirect_number, | 3178 | .data = &ip_rt_redirect_number, |
3058 | .maxlen = sizeof(int), | 3179 | .maxlen = sizeof(int), |
3059 | .mode = 0644, | 3180 | .mode = 0644, |
3060 | .proc_handler = &proc_dointvec, | 3181 | .proc_handler = proc_dointvec, |
3061 | }, | 3182 | }, |
3062 | { | 3183 | { |
3063 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, | 3184 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, |
@@ -3065,7 +3186,7 @@ static ctl_table ipv4_route_table[] = { | |||
3065 | .data = &ip_rt_redirect_silence, | 3186 | .data = &ip_rt_redirect_silence, |
3066 | .maxlen = sizeof(int), | 3187 | .maxlen = sizeof(int), |
3067 | .mode = 0644, | 3188 | .mode = 0644, |
3068 | .proc_handler = &proc_dointvec, | 3189 | .proc_handler = proc_dointvec, |
3069 | }, | 3190 | }, |
3070 | { | 3191 | { |
3071 | .ctl_name = NET_IPV4_ROUTE_ERROR_COST, | 3192 | .ctl_name = NET_IPV4_ROUTE_ERROR_COST, |
@@ -3073,7 +3194,7 @@ static ctl_table ipv4_route_table[] = { | |||
3073 | .data = &ip_rt_error_cost, | 3194 | .data = &ip_rt_error_cost, |
3074 | .maxlen = sizeof(int), | 3195 | .maxlen = sizeof(int), |
3075 | .mode = 0644, | 3196 | .mode = 0644, |
3076 | .proc_handler = &proc_dointvec, | 3197 | .proc_handler = proc_dointvec, |
3077 | }, | 3198 | }, |
3078 | { | 3199 | { |
3079 | .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, | 3200 | .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, |
@@ -3081,7 +3202,7 @@ static ctl_table ipv4_route_table[] = { | |||
3081 | .data = &ip_rt_error_burst, | 3202 | .data = &ip_rt_error_burst, |
3082 | .maxlen = sizeof(int), | 3203 | .maxlen = sizeof(int), |
3083 | .mode = 0644, | 3204 | .mode = 0644, |
3084 | .proc_handler = &proc_dointvec, | 3205 | .proc_handler = proc_dointvec, |
3085 | }, | 3206 | }, |
3086 | { | 3207 | { |
3087 | .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, | 3208 | .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, |
@@ -3089,7 +3210,7 @@ static ctl_table ipv4_route_table[] = { | |||
3089 | .data = &ip_rt_gc_elasticity, | 3210 | .data = &ip_rt_gc_elasticity, |
3090 | .maxlen = sizeof(int), | 3211 | .maxlen = sizeof(int), |
3091 | .mode = 0644, | 3212 | .mode = 0644, |
3092 | .proc_handler = &proc_dointvec, | 3213 | .proc_handler = proc_dointvec, |
3093 | }, | 3214 | }, |
3094 | { | 3215 | { |
3095 | .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, | 3216 | .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, |
@@ -3097,8 +3218,8 @@ static ctl_table ipv4_route_table[] = { | |||
3097 | .data = &ip_rt_mtu_expires, | 3218 | .data = &ip_rt_mtu_expires, |
3098 | .maxlen = sizeof(int), | 3219 | .maxlen = sizeof(int), |
3099 | .mode = 0644, | 3220 | .mode = 0644, |
3100 | .proc_handler = &proc_dointvec_jiffies, | 3221 | .proc_handler = proc_dointvec_jiffies, |
3101 | .strategy = &sysctl_jiffies, | 3222 | .strategy = sysctl_jiffies, |
3102 | }, | 3223 | }, |
3103 | { | 3224 | { |
3104 | .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, | 3225 | .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, |
@@ -3106,7 +3227,7 @@ static ctl_table ipv4_route_table[] = { | |||
3106 | .data = &ip_rt_min_pmtu, | 3227 | .data = &ip_rt_min_pmtu, |
3107 | .maxlen = sizeof(int), | 3228 | .maxlen = sizeof(int), |
3108 | .mode = 0644, | 3229 | .mode = 0644, |
3109 | .proc_handler = &proc_dointvec, | 3230 | .proc_handler = proc_dointvec, |
3110 | }, | 3231 | }, |
3111 | { | 3232 | { |
3112 | .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, | 3233 | .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, |
@@ -3114,7 +3235,7 @@ static ctl_table ipv4_route_table[] = { | |||
3114 | .data = &ip_rt_min_advmss, | 3235 | .data = &ip_rt_min_advmss, |
3115 | .maxlen = sizeof(int), | 3236 | .maxlen = sizeof(int), |
3116 | .mode = 0644, | 3237 | .mode = 0644, |
3117 | .proc_handler = &proc_dointvec, | 3238 | .proc_handler = proc_dointvec, |
3118 | }, | 3239 | }, |
3119 | { | 3240 | { |
3120 | .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, | 3241 | .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, |
@@ -3122,8 +3243,8 @@ static ctl_table ipv4_route_table[] = { | |||
3122 | .data = &ip_rt_secret_interval, | 3243 | .data = &ip_rt_secret_interval, |
3123 | .maxlen = sizeof(int), | 3244 | .maxlen = sizeof(int), |
3124 | .mode = 0644, | 3245 | .mode = 0644, |
3125 | .proc_handler = &ipv4_sysctl_rt_secret_interval, | 3246 | .proc_handler = ipv4_sysctl_rt_secret_interval, |
3126 | .strategy = &ipv4_sysctl_rt_secret_interval_strategy, | 3247 | .strategy = ipv4_sysctl_rt_secret_interval_strategy, |
3127 | }, | 3248 | }, |
3128 | { .ctl_name = 0 } | 3249 | { .ctl_name = 0 } |
3129 | }; | 3250 | }; |
@@ -3151,8 +3272,8 @@ static struct ctl_table ipv4_route_flush_table[] = { | |||
3151 | .procname = "flush", | 3272 | .procname = "flush", |
3152 | .maxlen = sizeof(int), | 3273 | .maxlen = sizeof(int), |
3153 | .mode = 0200, | 3274 | .mode = 0200, |
3154 | .proc_handler = &ipv4_sysctl_rtcache_flush, | 3275 | .proc_handler = ipv4_sysctl_rtcache_flush, |
3155 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | 3276 | .strategy = ipv4_sysctl_rtcache_flush_strategy, |
3156 | }, | 3277 | }, |
3157 | { .ctl_name = 0 }, | 3278 | { .ctl_name = 0 }, |
3158 | }; | 3279 | }; |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1bb10df8ce7d..4710d219f06a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -195,7 +195,7 @@ static struct ctl_table ipv4_table[] = { | |||
195 | .data = &sysctl_tcp_timestamps, | 195 | .data = &sysctl_tcp_timestamps, |
196 | .maxlen = sizeof(int), | 196 | .maxlen = sizeof(int), |
197 | .mode = 0644, | 197 | .mode = 0644, |
198 | .proc_handler = &proc_dointvec | 198 | .proc_handler = proc_dointvec |
199 | }, | 199 | }, |
200 | { | 200 | { |
201 | .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, | 201 | .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, |
@@ -203,7 +203,7 @@ static struct ctl_table ipv4_table[] = { | |||
203 | .data = &sysctl_tcp_window_scaling, | 203 | .data = &sysctl_tcp_window_scaling, |
204 | .maxlen = sizeof(int), | 204 | .maxlen = sizeof(int), |
205 | .mode = 0644, | 205 | .mode = 0644, |
206 | .proc_handler = &proc_dointvec | 206 | .proc_handler = proc_dointvec |
207 | }, | 207 | }, |
208 | { | 208 | { |
209 | .ctl_name = NET_IPV4_TCP_SACK, | 209 | .ctl_name = NET_IPV4_TCP_SACK, |
@@ -211,7 +211,7 @@ static struct ctl_table ipv4_table[] = { | |||
211 | .data = &sysctl_tcp_sack, | 211 | .data = &sysctl_tcp_sack, |
212 | .maxlen = sizeof(int), | 212 | .maxlen = sizeof(int), |
213 | .mode = 0644, | 213 | .mode = 0644, |
214 | .proc_handler = &proc_dointvec | 214 | .proc_handler = proc_dointvec |
215 | }, | 215 | }, |
216 | { | 216 | { |
217 | .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, | 217 | .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, |
@@ -219,7 +219,7 @@ static struct ctl_table ipv4_table[] = { | |||
219 | .data = &sysctl_tcp_retrans_collapse, | 219 | .data = &sysctl_tcp_retrans_collapse, |
220 | .maxlen = sizeof(int), | 220 | .maxlen = sizeof(int), |
221 | .mode = 0644, | 221 | .mode = 0644, |
222 | .proc_handler = &proc_dointvec | 222 | .proc_handler = proc_dointvec |
223 | }, | 223 | }, |
224 | { | 224 | { |
225 | .ctl_name = NET_IPV4_DEFAULT_TTL, | 225 | .ctl_name = NET_IPV4_DEFAULT_TTL, |
@@ -227,8 +227,8 @@ static struct ctl_table ipv4_table[] = { | |||
227 | .data = &sysctl_ip_default_ttl, | 227 | .data = &sysctl_ip_default_ttl, |
228 | .maxlen = sizeof(int), | 228 | .maxlen = sizeof(int), |
229 | .mode = 0644, | 229 | .mode = 0644, |
230 | .proc_handler = &ipv4_doint_and_flush, | 230 | .proc_handler = ipv4_doint_and_flush, |
231 | .strategy = &ipv4_doint_and_flush_strategy, | 231 | .strategy = ipv4_doint_and_flush_strategy, |
232 | .extra2 = &init_net, | 232 | .extra2 = &init_net, |
233 | }, | 233 | }, |
234 | { | 234 | { |
@@ -237,7 +237,7 @@ static struct ctl_table ipv4_table[] = { | |||
237 | .data = &ipv4_config.no_pmtu_disc, | 237 | .data = &ipv4_config.no_pmtu_disc, |
238 | .maxlen = sizeof(int), | 238 | .maxlen = sizeof(int), |
239 | .mode = 0644, | 239 | .mode = 0644, |
240 | .proc_handler = &proc_dointvec | 240 | .proc_handler = proc_dointvec |
241 | }, | 241 | }, |
242 | { | 242 | { |
243 | .ctl_name = NET_IPV4_NONLOCAL_BIND, | 243 | .ctl_name = NET_IPV4_NONLOCAL_BIND, |
@@ -245,7 +245,7 @@ static struct ctl_table ipv4_table[] = { | |||
245 | .data = &sysctl_ip_nonlocal_bind, | 245 | .data = &sysctl_ip_nonlocal_bind, |
246 | .maxlen = sizeof(int), | 246 | .maxlen = sizeof(int), |
247 | .mode = 0644, | 247 | .mode = 0644, |
248 | .proc_handler = &proc_dointvec | 248 | .proc_handler = proc_dointvec |
249 | }, | 249 | }, |
250 | { | 250 | { |
251 | .ctl_name = NET_IPV4_TCP_SYN_RETRIES, | 251 | .ctl_name = NET_IPV4_TCP_SYN_RETRIES, |
@@ -253,7 +253,7 @@ static struct ctl_table ipv4_table[] = { | |||
253 | .data = &sysctl_tcp_syn_retries, | 253 | .data = &sysctl_tcp_syn_retries, |
254 | .maxlen = sizeof(int), | 254 | .maxlen = sizeof(int), |
255 | .mode = 0644, | 255 | .mode = 0644, |
256 | .proc_handler = &proc_dointvec | 256 | .proc_handler = proc_dointvec |
257 | }, | 257 | }, |
258 | { | 258 | { |
259 | .ctl_name = NET_TCP_SYNACK_RETRIES, | 259 | .ctl_name = NET_TCP_SYNACK_RETRIES, |
@@ -261,7 +261,7 @@ static struct ctl_table ipv4_table[] = { | |||
261 | .data = &sysctl_tcp_synack_retries, | 261 | .data = &sysctl_tcp_synack_retries, |
262 | .maxlen = sizeof(int), | 262 | .maxlen = sizeof(int), |
263 | .mode = 0644, | 263 | .mode = 0644, |
264 | .proc_handler = &proc_dointvec | 264 | .proc_handler = proc_dointvec |
265 | }, | 265 | }, |
266 | { | 266 | { |
267 | .ctl_name = NET_TCP_MAX_ORPHANS, | 267 | .ctl_name = NET_TCP_MAX_ORPHANS, |
@@ -269,7 +269,7 @@ static struct ctl_table ipv4_table[] = { | |||
269 | .data = &sysctl_tcp_max_orphans, | 269 | .data = &sysctl_tcp_max_orphans, |
270 | .maxlen = sizeof(int), | 270 | .maxlen = sizeof(int), |
271 | .mode = 0644, | 271 | .mode = 0644, |
272 | .proc_handler = &proc_dointvec | 272 | .proc_handler = proc_dointvec |
273 | }, | 273 | }, |
274 | { | 274 | { |
275 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, | 275 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, |
@@ -277,7 +277,7 @@ static struct ctl_table ipv4_table[] = { | |||
277 | .data = &tcp_death_row.sysctl_max_tw_buckets, | 277 | .data = &tcp_death_row.sysctl_max_tw_buckets, |
278 | .maxlen = sizeof(int), | 278 | .maxlen = sizeof(int), |
279 | .mode = 0644, | 279 | .mode = 0644, |
280 | .proc_handler = &proc_dointvec | 280 | .proc_handler = proc_dointvec |
281 | }, | 281 | }, |
282 | { | 282 | { |
283 | .ctl_name = NET_IPV4_DYNADDR, | 283 | .ctl_name = NET_IPV4_DYNADDR, |
@@ -285,7 +285,7 @@ static struct ctl_table ipv4_table[] = { | |||
285 | .data = &sysctl_ip_dynaddr, | 285 | .data = &sysctl_ip_dynaddr, |
286 | .maxlen = sizeof(int), | 286 | .maxlen = sizeof(int), |
287 | .mode = 0644, | 287 | .mode = 0644, |
288 | .proc_handler = &proc_dointvec | 288 | .proc_handler = proc_dointvec |
289 | }, | 289 | }, |
290 | { | 290 | { |
291 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, | 291 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, |
@@ -293,8 +293,8 @@ static struct ctl_table ipv4_table[] = { | |||
293 | .data = &sysctl_tcp_keepalive_time, | 293 | .data = &sysctl_tcp_keepalive_time, |
294 | .maxlen = sizeof(int), | 294 | .maxlen = sizeof(int), |
295 | .mode = 0644, | 295 | .mode = 0644, |
296 | .proc_handler = &proc_dointvec_jiffies, | 296 | .proc_handler = proc_dointvec_jiffies, |
297 | .strategy = &sysctl_jiffies | 297 | .strategy = sysctl_jiffies |
298 | }, | 298 | }, |
299 | { | 299 | { |
300 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, | 300 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, |
@@ -302,7 +302,7 @@ static struct ctl_table ipv4_table[] = { | |||
302 | .data = &sysctl_tcp_keepalive_probes, | 302 | .data = &sysctl_tcp_keepalive_probes, |
303 | .maxlen = sizeof(int), | 303 | .maxlen = sizeof(int), |
304 | .mode = 0644, | 304 | .mode = 0644, |
305 | .proc_handler = &proc_dointvec | 305 | .proc_handler = proc_dointvec |
306 | }, | 306 | }, |
307 | { | 307 | { |
308 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, | 308 | .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, |
@@ -310,8 +310,8 @@ static struct ctl_table ipv4_table[] = { | |||
310 | .data = &sysctl_tcp_keepalive_intvl, | 310 | .data = &sysctl_tcp_keepalive_intvl, |
311 | .maxlen = sizeof(int), | 311 | .maxlen = sizeof(int), |
312 | .mode = 0644, | 312 | .mode = 0644, |
313 | .proc_handler = &proc_dointvec_jiffies, | 313 | .proc_handler = proc_dointvec_jiffies, |
314 | .strategy = &sysctl_jiffies | 314 | .strategy = sysctl_jiffies |
315 | }, | 315 | }, |
316 | { | 316 | { |
317 | .ctl_name = NET_IPV4_TCP_RETRIES1, | 317 | .ctl_name = NET_IPV4_TCP_RETRIES1, |
@@ -319,8 +319,8 @@ static struct ctl_table ipv4_table[] = { | |||
319 | .data = &sysctl_tcp_retries1, | 319 | .data = &sysctl_tcp_retries1, |
320 | .maxlen = sizeof(int), | 320 | .maxlen = sizeof(int), |
321 | .mode = 0644, | 321 | .mode = 0644, |
322 | .proc_handler = &proc_dointvec_minmax, | 322 | .proc_handler = proc_dointvec_minmax, |
323 | .strategy = &sysctl_intvec, | 323 | .strategy = sysctl_intvec, |
324 | .extra2 = &tcp_retr1_max | 324 | .extra2 = &tcp_retr1_max |
325 | }, | 325 | }, |
326 | { | 326 | { |
@@ -329,7 +329,7 @@ static struct ctl_table ipv4_table[] = { | |||
329 | .data = &sysctl_tcp_retries2, | 329 | .data = &sysctl_tcp_retries2, |
330 | .maxlen = sizeof(int), | 330 | .maxlen = sizeof(int), |
331 | .mode = 0644, | 331 | .mode = 0644, |
332 | .proc_handler = &proc_dointvec | 332 | .proc_handler = proc_dointvec |
333 | }, | 333 | }, |
334 | { | 334 | { |
335 | .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, | 335 | .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, |
@@ -337,8 +337,8 @@ static struct ctl_table ipv4_table[] = { | |||
337 | .data = &sysctl_tcp_fin_timeout, | 337 | .data = &sysctl_tcp_fin_timeout, |
338 | .maxlen = sizeof(int), | 338 | .maxlen = sizeof(int), |
339 | .mode = 0644, | 339 | .mode = 0644, |
340 | .proc_handler = &proc_dointvec_jiffies, | 340 | .proc_handler = proc_dointvec_jiffies, |
341 | .strategy = &sysctl_jiffies | 341 | .strategy = sysctl_jiffies |
342 | }, | 342 | }, |
343 | #ifdef CONFIG_SYN_COOKIES | 343 | #ifdef CONFIG_SYN_COOKIES |
344 | { | 344 | { |
@@ -347,7 +347,7 @@ static struct ctl_table ipv4_table[] = { | |||
347 | .data = &sysctl_tcp_syncookies, | 347 | .data = &sysctl_tcp_syncookies, |
348 | .maxlen = sizeof(int), | 348 | .maxlen = sizeof(int), |
349 | .mode = 0644, | 349 | .mode = 0644, |
350 | .proc_handler = &proc_dointvec | 350 | .proc_handler = proc_dointvec |
351 | }, | 351 | }, |
352 | #endif | 352 | #endif |
353 | { | 353 | { |
@@ -356,7 +356,7 @@ static struct ctl_table ipv4_table[] = { | |||
356 | .data = &tcp_death_row.sysctl_tw_recycle, | 356 | .data = &tcp_death_row.sysctl_tw_recycle, |
357 | .maxlen = sizeof(int), | 357 | .maxlen = sizeof(int), |
358 | .mode = 0644, | 358 | .mode = 0644, |
359 | .proc_handler = &proc_dointvec | 359 | .proc_handler = proc_dointvec |
360 | }, | 360 | }, |
361 | { | 361 | { |
362 | .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, | 362 | .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, |
@@ -364,7 +364,7 @@ static struct ctl_table ipv4_table[] = { | |||
364 | .data = &sysctl_tcp_abort_on_overflow, | 364 | .data = &sysctl_tcp_abort_on_overflow, |
365 | .maxlen = sizeof(int), | 365 | .maxlen = sizeof(int), |
366 | .mode = 0644, | 366 | .mode = 0644, |
367 | .proc_handler = &proc_dointvec | 367 | .proc_handler = proc_dointvec |
368 | }, | 368 | }, |
369 | { | 369 | { |
370 | .ctl_name = NET_TCP_STDURG, | 370 | .ctl_name = NET_TCP_STDURG, |
@@ -372,7 +372,7 @@ static struct ctl_table ipv4_table[] = { | |||
372 | .data = &sysctl_tcp_stdurg, | 372 | .data = &sysctl_tcp_stdurg, |
373 | .maxlen = sizeof(int), | 373 | .maxlen = sizeof(int), |
374 | .mode = 0644, | 374 | .mode = 0644, |
375 | .proc_handler = &proc_dointvec | 375 | .proc_handler = proc_dointvec |
376 | }, | 376 | }, |
377 | { | 377 | { |
378 | .ctl_name = NET_TCP_RFC1337, | 378 | .ctl_name = NET_TCP_RFC1337, |
@@ -380,7 +380,7 @@ static struct ctl_table ipv4_table[] = { | |||
380 | .data = &sysctl_tcp_rfc1337, | 380 | .data = &sysctl_tcp_rfc1337, |
381 | .maxlen = sizeof(int), | 381 | .maxlen = sizeof(int), |
382 | .mode = 0644, | 382 | .mode = 0644, |
383 | .proc_handler = &proc_dointvec | 383 | .proc_handler = proc_dointvec |
384 | }, | 384 | }, |
385 | { | 385 | { |
386 | .ctl_name = NET_TCP_MAX_SYN_BACKLOG, | 386 | .ctl_name = NET_TCP_MAX_SYN_BACKLOG, |
@@ -388,7 +388,7 @@ static struct ctl_table ipv4_table[] = { | |||
388 | .data = &sysctl_max_syn_backlog, | 388 | .data = &sysctl_max_syn_backlog, |
389 | .maxlen = sizeof(int), | 389 | .maxlen = sizeof(int), |
390 | .mode = 0644, | 390 | .mode = 0644, |
391 | .proc_handler = &proc_dointvec | 391 | .proc_handler = proc_dointvec |
392 | }, | 392 | }, |
393 | { | 393 | { |
394 | .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, | 394 | .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, |
@@ -396,8 +396,8 @@ static struct ctl_table ipv4_table[] = { | |||
396 | .data = &sysctl_local_ports.range, | 396 | .data = &sysctl_local_ports.range, |
397 | .maxlen = sizeof(sysctl_local_ports.range), | 397 | .maxlen = sizeof(sysctl_local_ports.range), |
398 | .mode = 0644, | 398 | .mode = 0644, |
399 | .proc_handler = &ipv4_local_port_range, | 399 | .proc_handler = ipv4_local_port_range, |
400 | .strategy = &ipv4_sysctl_local_port_range, | 400 | .strategy = ipv4_sysctl_local_port_range, |
401 | }, | 401 | }, |
402 | #ifdef CONFIG_IP_MULTICAST | 402 | #ifdef CONFIG_IP_MULTICAST |
403 | { | 403 | { |
@@ -406,7 +406,7 @@ static struct ctl_table ipv4_table[] = { | |||
406 | .data = &sysctl_igmp_max_memberships, | 406 | .data = &sysctl_igmp_max_memberships, |
407 | .maxlen = sizeof(int), | 407 | .maxlen = sizeof(int), |
408 | .mode = 0644, | 408 | .mode = 0644, |
409 | .proc_handler = &proc_dointvec | 409 | .proc_handler = proc_dointvec |
410 | }, | 410 | }, |
411 | 411 | ||
412 | #endif | 412 | #endif |
@@ -416,7 +416,7 @@ static struct ctl_table ipv4_table[] = { | |||
416 | .data = &sysctl_igmp_max_msf, | 416 | .data = &sysctl_igmp_max_msf, |
417 | .maxlen = sizeof(int), | 417 | .maxlen = sizeof(int), |
418 | .mode = 0644, | 418 | .mode = 0644, |
419 | .proc_handler = &proc_dointvec | 419 | .proc_handler = proc_dointvec |
420 | }, | 420 | }, |
421 | { | 421 | { |
422 | .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, | 422 | .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, |
@@ -424,7 +424,7 @@ static struct ctl_table ipv4_table[] = { | |||
424 | .data = &inet_peer_threshold, | 424 | .data = &inet_peer_threshold, |
425 | .maxlen = sizeof(int), | 425 | .maxlen = sizeof(int), |
426 | .mode = 0644, | 426 | .mode = 0644, |
427 | .proc_handler = &proc_dointvec | 427 | .proc_handler = proc_dointvec |
428 | }, | 428 | }, |
429 | { | 429 | { |
430 | .ctl_name = NET_IPV4_INET_PEER_MINTTL, | 430 | .ctl_name = NET_IPV4_INET_PEER_MINTTL, |
@@ -432,8 +432,8 @@ static struct ctl_table ipv4_table[] = { | |||
432 | .data = &inet_peer_minttl, | 432 | .data = &inet_peer_minttl, |
433 | .maxlen = sizeof(int), | 433 | .maxlen = sizeof(int), |
434 | .mode = 0644, | 434 | .mode = 0644, |
435 | .proc_handler = &proc_dointvec_jiffies, | 435 | .proc_handler = proc_dointvec_jiffies, |
436 | .strategy = &sysctl_jiffies | 436 | .strategy = sysctl_jiffies |
437 | }, | 437 | }, |
438 | { | 438 | { |
439 | .ctl_name = NET_IPV4_INET_PEER_MAXTTL, | 439 | .ctl_name = NET_IPV4_INET_PEER_MAXTTL, |
@@ -441,8 +441,8 @@ static struct ctl_table ipv4_table[] = { | |||
441 | .data = &inet_peer_maxttl, | 441 | .data = &inet_peer_maxttl, |
442 | .maxlen = sizeof(int), | 442 | .maxlen = sizeof(int), |
443 | .mode = 0644, | 443 | .mode = 0644, |
444 | .proc_handler = &proc_dointvec_jiffies, | 444 | .proc_handler = proc_dointvec_jiffies, |
445 | .strategy = &sysctl_jiffies | 445 | .strategy = sysctl_jiffies |
446 | }, | 446 | }, |
447 | { | 447 | { |
448 | .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, | 448 | .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, |
@@ -450,8 +450,8 @@ static struct ctl_table ipv4_table[] = { | |||
450 | .data = &inet_peer_gc_mintime, | 450 | .data = &inet_peer_gc_mintime, |
451 | .maxlen = sizeof(int), | 451 | .maxlen = sizeof(int), |
452 | .mode = 0644, | 452 | .mode = 0644, |
453 | .proc_handler = &proc_dointvec_jiffies, | 453 | .proc_handler = proc_dointvec_jiffies, |
454 | .strategy = &sysctl_jiffies | 454 | .strategy = sysctl_jiffies |
455 | }, | 455 | }, |
456 | { | 456 | { |
457 | .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, | 457 | .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, |
@@ -459,8 +459,8 @@ static struct ctl_table ipv4_table[] = { | |||
459 | .data = &inet_peer_gc_maxtime, | 459 | .data = &inet_peer_gc_maxtime, |
460 | .maxlen = sizeof(int), | 460 | .maxlen = sizeof(int), |
461 | .mode = 0644, | 461 | .mode = 0644, |
462 | .proc_handler = &proc_dointvec_jiffies, | 462 | .proc_handler = proc_dointvec_jiffies, |
463 | .strategy = &sysctl_jiffies | 463 | .strategy = sysctl_jiffies |
464 | }, | 464 | }, |
465 | { | 465 | { |
466 | .ctl_name = NET_TCP_ORPHAN_RETRIES, | 466 | .ctl_name = NET_TCP_ORPHAN_RETRIES, |
@@ -468,7 +468,7 @@ static struct ctl_table ipv4_table[] = { | |||
468 | .data = &sysctl_tcp_orphan_retries, | 468 | .data = &sysctl_tcp_orphan_retries, |
469 | .maxlen = sizeof(int), | 469 | .maxlen = sizeof(int), |
470 | .mode = 0644, | 470 | .mode = 0644, |
471 | .proc_handler = &proc_dointvec | 471 | .proc_handler = proc_dointvec |
472 | }, | 472 | }, |
473 | { | 473 | { |
474 | .ctl_name = NET_TCP_FACK, | 474 | .ctl_name = NET_TCP_FACK, |
@@ -476,7 +476,7 @@ static struct ctl_table ipv4_table[] = { | |||
476 | .data = &sysctl_tcp_fack, | 476 | .data = &sysctl_tcp_fack, |
477 | .maxlen = sizeof(int), | 477 | .maxlen = sizeof(int), |
478 | .mode = 0644, | 478 | .mode = 0644, |
479 | .proc_handler = &proc_dointvec | 479 | .proc_handler = proc_dointvec |
480 | }, | 480 | }, |
481 | { | 481 | { |
482 | .ctl_name = NET_TCP_REORDERING, | 482 | .ctl_name = NET_TCP_REORDERING, |
@@ -484,7 +484,7 @@ static struct ctl_table ipv4_table[] = { | |||
484 | .data = &sysctl_tcp_reordering, | 484 | .data = &sysctl_tcp_reordering, |
485 | .maxlen = sizeof(int), | 485 | .maxlen = sizeof(int), |
486 | .mode = 0644, | 486 | .mode = 0644, |
487 | .proc_handler = &proc_dointvec | 487 | .proc_handler = proc_dointvec |
488 | }, | 488 | }, |
489 | { | 489 | { |
490 | .ctl_name = NET_TCP_ECN, | 490 | .ctl_name = NET_TCP_ECN, |
@@ -492,7 +492,7 @@ static struct ctl_table ipv4_table[] = { | |||
492 | .data = &sysctl_tcp_ecn, | 492 | .data = &sysctl_tcp_ecn, |
493 | .maxlen = sizeof(int), | 493 | .maxlen = sizeof(int), |
494 | .mode = 0644, | 494 | .mode = 0644, |
495 | .proc_handler = &proc_dointvec | 495 | .proc_handler = proc_dointvec |
496 | }, | 496 | }, |
497 | { | 497 | { |
498 | .ctl_name = NET_TCP_DSACK, | 498 | .ctl_name = NET_TCP_DSACK, |
@@ -500,7 +500,7 @@ static struct ctl_table ipv4_table[] = { | |||
500 | .data = &sysctl_tcp_dsack, | 500 | .data = &sysctl_tcp_dsack, |
501 | .maxlen = sizeof(int), | 501 | .maxlen = sizeof(int), |
502 | .mode = 0644, | 502 | .mode = 0644, |
503 | .proc_handler = &proc_dointvec | 503 | .proc_handler = proc_dointvec |
504 | }, | 504 | }, |
505 | { | 505 | { |
506 | .ctl_name = NET_TCP_MEM, | 506 | .ctl_name = NET_TCP_MEM, |
@@ -508,7 +508,7 @@ static struct ctl_table ipv4_table[] = { | |||
508 | .data = &sysctl_tcp_mem, | 508 | .data = &sysctl_tcp_mem, |
509 | .maxlen = sizeof(sysctl_tcp_mem), | 509 | .maxlen = sizeof(sysctl_tcp_mem), |
510 | .mode = 0644, | 510 | .mode = 0644, |
511 | .proc_handler = &proc_dointvec | 511 | .proc_handler = proc_dointvec |
512 | }, | 512 | }, |
513 | { | 513 | { |
514 | .ctl_name = NET_TCP_WMEM, | 514 | .ctl_name = NET_TCP_WMEM, |
@@ -516,7 +516,7 @@ static struct ctl_table ipv4_table[] = { | |||
516 | .data = &sysctl_tcp_wmem, | 516 | .data = &sysctl_tcp_wmem, |
517 | .maxlen = sizeof(sysctl_tcp_wmem), | 517 | .maxlen = sizeof(sysctl_tcp_wmem), |
518 | .mode = 0644, | 518 | .mode = 0644, |
519 | .proc_handler = &proc_dointvec | 519 | .proc_handler = proc_dointvec |
520 | }, | 520 | }, |
521 | { | 521 | { |
522 | .ctl_name = NET_TCP_RMEM, | 522 | .ctl_name = NET_TCP_RMEM, |
@@ -524,7 +524,7 @@ static struct ctl_table ipv4_table[] = { | |||
524 | .data = &sysctl_tcp_rmem, | 524 | .data = &sysctl_tcp_rmem, |
525 | .maxlen = sizeof(sysctl_tcp_rmem), | 525 | .maxlen = sizeof(sysctl_tcp_rmem), |
526 | .mode = 0644, | 526 | .mode = 0644, |
527 | .proc_handler = &proc_dointvec | 527 | .proc_handler = proc_dointvec |
528 | }, | 528 | }, |
529 | { | 529 | { |
530 | .ctl_name = NET_TCP_APP_WIN, | 530 | .ctl_name = NET_TCP_APP_WIN, |
@@ -532,7 +532,7 @@ static struct ctl_table ipv4_table[] = { | |||
532 | .data = &sysctl_tcp_app_win, | 532 | .data = &sysctl_tcp_app_win, |
533 | .maxlen = sizeof(int), | 533 | .maxlen = sizeof(int), |
534 | .mode = 0644, | 534 | .mode = 0644, |
535 | .proc_handler = &proc_dointvec | 535 | .proc_handler = proc_dointvec |
536 | }, | 536 | }, |
537 | { | 537 | { |
538 | .ctl_name = NET_TCP_ADV_WIN_SCALE, | 538 | .ctl_name = NET_TCP_ADV_WIN_SCALE, |
@@ -540,7 +540,7 @@ static struct ctl_table ipv4_table[] = { | |||
540 | .data = &sysctl_tcp_adv_win_scale, | 540 | .data = &sysctl_tcp_adv_win_scale, |
541 | .maxlen = sizeof(int), | 541 | .maxlen = sizeof(int), |
542 | .mode = 0644, | 542 | .mode = 0644, |
543 | .proc_handler = &proc_dointvec | 543 | .proc_handler = proc_dointvec |
544 | }, | 544 | }, |
545 | { | 545 | { |
546 | .ctl_name = NET_TCP_TW_REUSE, | 546 | .ctl_name = NET_TCP_TW_REUSE, |
@@ -548,7 +548,7 @@ static struct ctl_table ipv4_table[] = { | |||
548 | .data = &sysctl_tcp_tw_reuse, | 548 | .data = &sysctl_tcp_tw_reuse, |
549 | .maxlen = sizeof(int), | 549 | .maxlen = sizeof(int), |
550 | .mode = 0644, | 550 | .mode = 0644, |
551 | .proc_handler = &proc_dointvec | 551 | .proc_handler = proc_dointvec |
552 | }, | 552 | }, |
553 | { | 553 | { |
554 | .ctl_name = NET_TCP_FRTO, | 554 | .ctl_name = NET_TCP_FRTO, |
@@ -556,7 +556,7 @@ static struct ctl_table ipv4_table[] = { | |||
556 | .data = &sysctl_tcp_frto, | 556 | .data = &sysctl_tcp_frto, |
557 | .maxlen = sizeof(int), | 557 | .maxlen = sizeof(int), |
558 | .mode = 0644, | 558 | .mode = 0644, |
559 | .proc_handler = &proc_dointvec | 559 | .proc_handler = proc_dointvec |
560 | }, | 560 | }, |
561 | { | 561 | { |
562 | .ctl_name = NET_TCP_FRTO_RESPONSE, | 562 | .ctl_name = NET_TCP_FRTO_RESPONSE, |
@@ -564,7 +564,7 @@ static struct ctl_table ipv4_table[] = { | |||
564 | .data = &sysctl_tcp_frto_response, | 564 | .data = &sysctl_tcp_frto_response, |
565 | .maxlen = sizeof(int), | 565 | .maxlen = sizeof(int), |
566 | .mode = 0644, | 566 | .mode = 0644, |
567 | .proc_handler = &proc_dointvec | 567 | .proc_handler = proc_dointvec |
568 | }, | 568 | }, |
569 | { | 569 | { |
570 | .ctl_name = NET_TCP_LOW_LATENCY, | 570 | .ctl_name = NET_TCP_LOW_LATENCY, |
@@ -572,7 +572,7 @@ static struct ctl_table ipv4_table[] = { | |||
572 | .data = &sysctl_tcp_low_latency, | 572 | .data = &sysctl_tcp_low_latency, |
573 | .maxlen = sizeof(int), | 573 | .maxlen = sizeof(int), |
574 | .mode = 0644, | 574 | .mode = 0644, |
575 | .proc_handler = &proc_dointvec | 575 | .proc_handler = proc_dointvec |
576 | }, | 576 | }, |
577 | { | 577 | { |
578 | .ctl_name = NET_TCP_NO_METRICS_SAVE, | 578 | .ctl_name = NET_TCP_NO_METRICS_SAVE, |
@@ -580,7 +580,7 @@ static struct ctl_table ipv4_table[] = { | |||
580 | .data = &sysctl_tcp_nometrics_save, | 580 | .data = &sysctl_tcp_nometrics_save, |
581 | .maxlen = sizeof(int), | 581 | .maxlen = sizeof(int), |
582 | .mode = 0644, | 582 | .mode = 0644, |
583 | .proc_handler = &proc_dointvec, | 583 | .proc_handler = proc_dointvec, |
584 | }, | 584 | }, |
585 | { | 585 | { |
586 | .ctl_name = NET_TCP_MODERATE_RCVBUF, | 586 | .ctl_name = NET_TCP_MODERATE_RCVBUF, |
@@ -588,7 +588,7 @@ static struct ctl_table ipv4_table[] = { | |||
588 | .data = &sysctl_tcp_moderate_rcvbuf, | 588 | .data = &sysctl_tcp_moderate_rcvbuf, |
589 | .maxlen = sizeof(int), | 589 | .maxlen = sizeof(int), |
590 | .mode = 0644, | 590 | .mode = 0644, |
591 | .proc_handler = &proc_dointvec, | 591 | .proc_handler = proc_dointvec, |
592 | }, | 592 | }, |
593 | { | 593 | { |
594 | .ctl_name = NET_TCP_TSO_WIN_DIVISOR, | 594 | .ctl_name = NET_TCP_TSO_WIN_DIVISOR, |
@@ -596,15 +596,15 @@ static struct ctl_table ipv4_table[] = { | |||
596 | .data = &sysctl_tcp_tso_win_divisor, | 596 | .data = &sysctl_tcp_tso_win_divisor, |
597 | .maxlen = sizeof(int), | 597 | .maxlen = sizeof(int), |
598 | .mode = 0644, | 598 | .mode = 0644, |
599 | .proc_handler = &proc_dointvec, | 599 | .proc_handler = proc_dointvec, |
600 | }, | 600 | }, |
601 | { | 601 | { |
602 | .ctl_name = NET_TCP_CONG_CONTROL, | 602 | .ctl_name = NET_TCP_CONG_CONTROL, |
603 | .procname = "tcp_congestion_control", | 603 | .procname = "tcp_congestion_control", |
604 | .mode = 0644, | 604 | .mode = 0644, |
605 | .maxlen = TCP_CA_NAME_MAX, | 605 | .maxlen = TCP_CA_NAME_MAX, |
606 | .proc_handler = &proc_tcp_congestion_control, | 606 | .proc_handler = proc_tcp_congestion_control, |
607 | .strategy = &sysctl_tcp_congestion_control, | 607 | .strategy = sysctl_tcp_congestion_control, |
608 | }, | 608 | }, |
609 | { | 609 | { |
610 | .ctl_name = NET_TCP_ABC, | 610 | .ctl_name = NET_TCP_ABC, |
@@ -612,7 +612,7 @@ static struct ctl_table ipv4_table[] = { | |||
612 | .data = &sysctl_tcp_abc, | 612 | .data = &sysctl_tcp_abc, |
613 | .maxlen = sizeof(int), | 613 | .maxlen = sizeof(int), |
614 | .mode = 0644, | 614 | .mode = 0644, |
615 | .proc_handler = &proc_dointvec, | 615 | .proc_handler = proc_dointvec, |
616 | }, | 616 | }, |
617 | { | 617 | { |
618 | .ctl_name = NET_TCP_MTU_PROBING, | 618 | .ctl_name = NET_TCP_MTU_PROBING, |
@@ -620,7 +620,7 @@ static struct ctl_table ipv4_table[] = { | |||
620 | .data = &sysctl_tcp_mtu_probing, | 620 | .data = &sysctl_tcp_mtu_probing, |
621 | .maxlen = sizeof(int), | 621 | .maxlen = sizeof(int), |
622 | .mode = 0644, | 622 | .mode = 0644, |
623 | .proc_handler = &proc_dointvec, | 623 | .proc_handler = proc_dointvec, |
624 | }, | 624 | }, |
625 | { | 625 | { |
626 | .ctl_name = NET_TCP_BASE_MSS, | 626 | .ctl_name = NET_TCP_BASE_MSS, |
@@ -628,7 +628,7 @@ static struct ctl_table ipv4_table[] = { | |||
628 | .data = &sysctl_tcp_base_mss, | 628 | .data = &sysctl_tcp_base_mss, |
629 | .maxlen = sizeof(int), | 629 | .maxlen = sizeof(int), |
630 | .mode = 0644, | 630 | .mode = 0644, |
631 | .proc_handler = &proc_dointvec, | 631 | .proc_handler = proc_dointvec, |
632 | }, | 632 | }, |
633 | { | 633 | { |
634 | .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, | 634 | .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, |
@@ -636,7 +636,7 @@ static struct ctl_table ipv4_table[] = { | |||
636 | .data = &sysctl_tcp_workaround_signed_windows, | 636 | .data = &sysctl_tcp_workaround_signed_windows, |
637 | .maxlen = sizeof(int), | 637 | .maxlen = sizeof(int), |
638 | .mode = 0644, | 638 | .mode = 0644, |
639 | .proc_handler = &proc_dointvec | 639 | .proc_handler = proc_dointvec |
640 | }, | 640 | }, |
641 | #ifdef CONFIG_NET_DMA | 641 | #ifdef CONFIG_NET_DMA |
642 | { | 642 | { |
@@ -645,7 +645,7 @@ static struct ctl_table ipv4_table[] = { | |||
645 | .data = &sysctl_tcp_dma_copybreak, | 645 | .data = &sysctl_tcp_dma_copybreak, |
646 | .maxlen = sizeof(int), | 646 | .maxlen = sizeof(int), |
647 | .mode = 0644, | 647 | .mode = 0644, |
648 | .proc_handler = &proc_dointvec | 648 | .proc_handler = proc_dointvec |
649 | }, | 649 | }, |
650 | #endif | 650 | #endif |
651 | { | 651 | { |
@@ -654,7 +654,7 @@ static struct ctl_table ipv4_table[] = { | |||
654 | .data = &sysctl_tcp_slow_start_after_idle, | 654 | .data = &sysctl_tcp_slow_start_after_idle, |
655 | .maxlen = sizeof(int), | 655 | .maxlen = sizeof(int), |
656 | .mode = 0644, | 656 | .mode = 0644, |
657 | .proc_handler = &proc_dointvec | 657 | .proc_handler = proc_dointvec |
658 | }, | 658 | }, |
659 | #ifdef CONFIG_NETLABEL | 659 | #ifdef CONFIG_NETLABEL |
660 | { | 660 | { |
@@ -663,7 +663,7 @@ static struct ctl_table ipv4_table[] = { | |||
663 | .data = &cipso_v4_cache_enabled, | 663 | .data = &cipso_v4_cache_enabled, |
664 | .maxlen = sizeof(int), | 664 | .maxlen = sizeof(int), |
665 | .mode = 0644, | 665 | .mode = 0644, |
666 | .proc_handler = &proc_dointvec, | 666 | .proc_handler = proc_dointvec, |
667 | }, | 667 | }, |
668 | { | 668 | { |
669 | .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, | 669 | .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, |
@@ -671,7 +671,7 @@ static struct ctl_table ipv4_table[] = { | |||
671 | .data = &cipso_v4_cache_bucketsize, | 671 | .data = &cipso_v4_cache_bucketsize, |
672 | .maxlen = sizeof(int), | 672 | .maxlen = sizeof(int), |
673 | .mode = 0644, | 673 | .mode = 0644, |
674 | .proc_handler = &proc_dointvec, | 674 | .proc_handler = proc_dointvec, |
675 | }, | 675 | }, |
676 | { | 676 | { |
677 | .ctl_name = NET_CIPSOV4_RBM_OPTFMT, | 677 | .ctl_name = NET_CIPSOV4_RBM_OPTFMT, |
@@ -679,7 +679,7 @@ static struct ctl_table ipv4_table[] = { | |||
679 | .data = &cipso_v4_rbm_optfmt, | 679 | .data = &cipso_v4_rbm_optfmt, |
680 | .maxlen = sizeof(int), | 680 | .maxlen = sizeof(int), |
681 | .mode = 0644, | 681 | .mode = 0644, |
682 | .proc_handler = &proc_dointvec, | 682 | .proc_handler = proc_dointvec, |
683 | }, | 683 | }, |
684 | { | 684 | { |
685 | .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, | 685 | .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, |
@@ -687,22 +687,22 @@ static struct ctl_table ipv4_table[] = { | |||
687 | .data = &cipso_v4_rbm_strictvalid, | 687 | .data = &cipso_v4_rbm_strictvalid, |
688 | .maxlen = sizeof(int), | 688 | .maxlen = sizeof(int), |
689 | .mode = 0644, | 689 | .mode = 0644, |
690 | .proc_handler = &proc_dointvec, | 690 | .proc_handler = proc_dointvec, |
691 | }, | 691 | }, |
692 | #endif /* CONFIG_NETLABEL */ | 692 | #endif /* CONFIG_NETLABEL */ |
693 | { | 693 | { |
694 | .procname = "tcp_available_congestion_control", | 694 | .procname = "tcp_available_congestion_control", |
695 | .maxlen = TCP_CA_BUF_MAX, | 695 | .maxlen = TCP_CA_BUF_MAX, |
696 | .mode = 0444, | 696 | .mode = 0444, |
697 | .proc_handler = &proc_tcp_available_congestion_control, | 697 | .proc_handler = proc_tcp_available_congestion_control, |
698 | }, | 698 | }, |
699 | { | 699 | { |
700 | .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, | 700 | .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, |
701 | .procname = "tcp_allowed_congestion_control", | 701 | .procname = "tcp_allowed_congestion_control", |
702 | .maxlen = TCP_CA_BUF_MAX, | 702 | .maxlen = TCP_CA_BUF_MAX, |
703 | .mode = 0644, | 703 | .mode = 0644, |
704 | .proc_handler = &proc_allowed_congestion_control, | 704 | .proc_handler = proc_allowed_congestion_control, |
705 | .strategy = &strategy_allowed_congestion_control, | 705 | .strategy = strategy_allowed_congestion_control, |
706 | }, | 706 | }, |
707 | { | 707 | { |
708 | .ctl_name = NET_TCP_MAX_SSTHRESH, | 708 | .ctl_name = NET_TCP_MAX_SSTHRESH, |
@@ -710,7 +710,7 @@ static struct ctl_table ipv4_table[] = { | |||
710 | .data = &sysctl_tcp_max_ssthresh, | 710 | .data = &sysctl_tcp_max_ssthresh, |
711 | .maxlen = sizeof(int), | 711 | .maxlen = sizeof(int), |
712 | .mode = 0644, | 712 | .mode = 0644, |
713 | .proc_handler = &proc_dointvec, | 713 | .proc_handler = proc_dointvec, |
714 | }, | 714 | }, |
715 | { | 715 | { |
716 | .ctl_name = CTL_UNNUMBERED, | 716 | .ctl_name = CTL_UNNUMBERED, |
@@ -718,8 +718,8 @@ static struct ctl_table ipv4_table[] = { | |||
718 | .data = &sysctl_udp_mem, | 718 | .data = &sysctl_udp_mem, |
719 | .maxlen = sizeof(sysctl_udp_mem), | 719 | .maxlen = sizeof(sysctl_udp_mem), |
720 | .mode = 0644, | 720 | .mode = 0644, |
721 | .proc_handler = &proc_dointvec_minmax, | 721 | .proc_handler = proc_dointvec_minmax, |
722 | .strategy = &sysctl_intvec, | 722 | .strategy = sysctl_intvec, |
723 | .extra1 = &zero | 723 | .extra1 = &zero |
724 | }, | 724 | }, |
725 | { | 725 | { |
@@ -728,8 +728,8 @@ static struct ctl_table ipv4_table[] = { | |||
728 | .data = &sysctl_udp_rmem_min, | 728 | .data = &sysctl_udp_rmem_min, |
729 | .maxlen = sizeof(sysctl_udp_rmem_min), | 729 | .maxlen = sizeof(sysctl_udp_rmem_min), |
730 | .mode = 0644, | 730 | .mode = 0644, |
731 | .proc_handler = &proc_dointvec_minmax, | 731 | .proc_handler = proc_dointvec_minmax, |
732 | .strategy = &sysctl_intvec, | 732 | .strategy = sysctl_intvec, |
733 | .extra1 = &zero | 733 | .extra1 = &zero |
734 | }, | 734 | }, |
735 | { | 735 | { |
@@ -738,8 +738,8 @@ static struct ctl_table ipv4_table[] = { | |||
738 | .data = &sysctl_udp_wmem_min, | 738 | .data = &sysctl_udp_wmem_min, |
739 | .maxlen = sizeof(sysctl_udp_wmem_min), | 739 | .maxlen = sizeof(sysctl_udp_wmem_min), |
740 | .mode = 0644, | 740 | .mode = 0644, |
741 | .proc_handler = &proc_dointvec_minmax, | 741 | .proc_handler = proc_dointvec_minmax, |
742 | .strategy = &sysctl_intvec, | 742 | .strategy = sysctl_intvec, |
743 | .extra1 = &zero | 743 | .extra1 = &zero |
744 | }, | 744 | }, |
745 | { .ctl_name = 0 } | 745 | { .ctl_name = 0 } |
@@ -752,7 +752,7 @@ static struct ctl_table ipv4_net_table[] = { | |||
752 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, | 752 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, |
753 | .maxlen = sizeof(int), | 753 | .maxlen = sizeof(int), |
754 | .mode = 0644, | 754 | .mode = 0644, |
755 | .proc_handler = &proc_dointvec | 755 | .proc_handler = proc_dointvec |
756 | }, | 756 | }, |
757 | { | 757 | { |
758 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, | 758 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, |
@@ -760,7 +760,7 @@ static struct ctl_table ipv4_net_table[] = { | |||
760 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, | 760 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, |
761 | .maxlen = sizeof(int), | 761 | .maxlen = sizeof(int), |
762 | .mode = 0644, | 762 | .mode = 0644, |
763 | .proc_handler = &proc_dointvec | 763 | .proc_handler = proc_dointvec |
764 | }, | 764 | }, |
765 | { | 765 | { |
766 | .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, | 766 | .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, |
@@ -768,7 +768,7 @@ static struct ctl_table ipv4_net_table[] = { | |||
768 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | 768 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, |
769 | .maxlen = sizeof(int), | 769 | .maxlen = sizeof(int), |
770 | .mode = 0644, | 770 | .mode = 0644, |
771 | .proc_handler = &proc_dointvec | 771 | .proc_handler = proc_dointvec |
772 | }, | 772 | }, |
773 | { | 773 | { |
774 | .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, | 774 | .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, |
@@ -776,7 +776,7 @@ static struct ctl_table ipv4_net_table[] = { | |||
776 | .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, | 776 | .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, |
777 | .maxlen = sizeof(int), | 777 | .maxlen = sizeof(int), |
778 | .mode = 0644, | 778 | .mode = 0644, |
779 | .proc_handler = &proc_dointvec | 779 | .proc_handler = proc_dointvec |
780 | }, | 780 | }, |
781 | { | 781 | { |
782 | .ctl_name = NET_IPV4_ICMP_RATELIMIT, | 782 | .ctl_name = NET_IPV4_ICMP_RATELIMIT, |
@@ -784,8 +784,8 @@ static struct ctl_table ipv4_net_table[] = { | |||
784 | .data = &init_net.ipv4.sysctl_icmp_ratelimit, | 784 | .data = &init_net.ipv4.sysctl_icmp_ratelimit, |
785 | .maxlen = sizeof(int), | 785 | .maxlen = sizeof(int), |
786 | .mode = 0644, | 786 | .mode = 0644, |
787 | .proc_handler = &proc_dointvec_ms_jiffies, | 787 | .proc_handler = proc_dointvec_ms_jiffies, |
788 | .strategy = &sysctl_ms_jiffies | 788 | .strategy = sysctl_ms_jiffies |
789 | }, | 789 | }, |
790 | { | 790 | { |
791 | .ctl_name = NET_IPV4_ICMP_RATEMASK, | 791 | .ctl_name = NET_IPV4_ICMP_RATEMASK, |
@@ -793,7 +793,15 @@ static struct ctl_table ipv4_net_table[] = { | |||
793 | .data = &init_net.ipv4.sysctl_icmp_ratemask, | 793 | .data = &init_net.ipv4.sysctl_icmp_ratemask, |
794 | .maxlen = sizeof(int), | 794 | .maxlen = sizeof(int), |
795 | .mode = 0644, | 795 | .mode = 0644, |
796 | .proc_handler = &proc_dointvec | 796 | .proc_handler = proc_dointvec |
797 | }, | ||
798 | { | ||
799 | .ctl_name = CTL_UNNUMBERED, | ||
800 | .procname = "rt_cache_rebuild_count", | ||
801 | .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, | ||
802 | .maxlen = sizeof(int), | ||
803 | .mode = 0644, | ||
804 | .proc_handler = proc_dointvec | ||
797 | }, | 805 | }, |
798 | { } | 806 | { } |
799 | }; | 807 | }; |
@@ -827,8 +835,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
827 | &net->ipv4.sysctl_icmp_ratelimit; | 835 | &net->ipv4.sysctl_icmp_ratelimit; |
828 | table[5].data = | 836 | table[5].data = |
829 | &net->ipv4.sysctl_icmp_ratemask; | 837 | &net->ipv4.sysctl_icmp_ratemask; |
838 | table[6].data = | ||
839 | &net->ipv4.sysctl_rt_cache_rebuild_count; | ||
830 | } | 840 | } |
831 | 841 | ||
842 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; | ||
843 | |||
832 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 844 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, |
833 | net_ipv4_ctl_path, table); | 845 | net_ipv4_ctl_path, table); |
834 | if (net->ipv4.ipv4_hdr == NULL) | 846 | if (net->ipv4.ipv4_hdr == NULL) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c5aca0bb116a..1f3d52946b3b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -277,8 +277,7 @@ | |||
277 | 277 | ||
278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | 278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; |
279 | 279 | ||
280 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); | 280 | struct percpu_counter tcp_orphan_count; |
281 | |||
282 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 281 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
283 | 282 | ||
284 | int sysctl_tcp_mem[3] __read_mostly; | 283 | int sysctl_tcp_mem[3] __read_mostly; |
@@ -290,9 +289,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); | |||
290 | EXPORT_SYMBOL(sysctl_tcp_wmem); | 289 | EXPORT_SYMBOL(sysctl_tcp_wmem); |
291 | 290 | ||
292 | atomic_t tcp_memory_allocated; /* Current allocated memory. */ | 291 | atomic_t tcp_memory_allocated; /* Current allocated memory. */ |
293 | atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ | ||
294 | |||
295 | EXPORT_SYMBOL(tcp_memory_allocated); | 292 | EXPORT_SYMBOL(tcp_memory_allocated); |
293 | |||
294 | /* | ||
295 | * Current number of TCP sockets. | ||
296 | */ | ||
297 | struct percpu_counter tcp_sockets_allocated; | ||
296 | EXPORT_SYMBOL(tcp_sockets_allocated); | 298 | EXPORT_SYMBOL(tcp_sockets_allocated); |
297 | 299 | ||
298 | /* | 300 | /* |
@@ -1680,7 +1682,7 @@ void tcp_set_state(struct sock *sk, int state) | |||
1680 | inet_put_port(sk); | 1682 | inet_put_port(sk); |
1681 | /* fall through */ | 1683 | /* fall through */ |
1682 | default: | 1684 | default: |
1683 | if (oldstate==TCP_ESTABLISHED) | 1685 | if (oldstate == TCP_ESTABLISHED) |
1684 | TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); | 1686 | TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); |
1685 | } | 1687 | } |
1686 | 1688 | ||
@@ -1690,7 +1692,7 @@ void tcp_set_state(struct sock *sk, int state) | |||
1690 | sk->sk_state = state; | 1692 | sk->sk_state = state; |
1691 | 1693 | ||
1692 | #ifdef STATE_TRACE | 1694 | #ifdef STATE_TRACE |
1693 | SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); | 1695 | SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); |
1694 | #endif | 1696 | #endif |
1695 | } | 1697 | } |
1696 | EXPORT_SYMBOL_GPL(tcp_set_state); | 1698 | EXPORT_SYMBOL_GPL(tcp_set_state); |
@@ -1834,7 +1836,7 @@ adjudge_to_death: | |||
1834 | state = sk->sk_state; | 1836 | state = sk->sk_state; |
1835 | sock_hold(sk); | 1837 | sock_hold(sk); |
1836 | sock_orphan(sk); | 1838 | sock_orphan(sk); |
1837 | atomic_inc(sk->sk_prot->orphan_count); | 1839 | percpu_counter_inc(sk->sk_prot->orphan_count); |
1838 | 1840 | ||
1839 | /* It is the last release_sock in its life. It will remove backlog. */ | 1841 | /* It is the last release_sock in its life. It will remove backlog. */ |
1840 | release_sock(sk); | 1842 | release_sock(sk); |
@@ -1885,9 +1887,11 @@ adjudge_to_death: | |||
1885 | } | 1887 | } |
1886 | } | 1888 | } |
1887 | if (sk->sk_state != TCP_CLOSE) { | 1889 | if (sk->sk_state != TCP_CLOSE) { |
1890 | int orphan_count = percpu_counter_read_positive( | ||
1891 | sk->sk_prot->orphan_count); | ||
1892 | |||
1888 | sk_mem_reclaim(sk); | 1893 | sk_mem_reclaim(sk); |
1889 | if (tcp_too_many_orphans(sk, | 1894 | if (tcp_too_many_orphans(sk, orphan_count)) { |
1890 | atomic_read(sk->sk_prot->orphan_count))) { | ||
1891 | if (net_ratelimit()) | 1895 | if (net_ratelimit()) |
1892 | printk(KERN_INFO "TCP: too many of orphaned " | 1896 | printk(KERN_INFO "TCP: too many of orphaned " |
1893 | "sockets\n"); | 1897 | "sockets\n"); |
@@ -2461,6 +2465,106 @@ out: | |||
2461 | } | 2465 | } |
2462 | EXPORT_SYMBOL(tcp_tso_segment); | 2466 | EXPORT_SYMBOL(tcp_tso_segment); |
2463 | 2467 | ||
2468 | struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | ||
2469 | { | ||
2470 | struct sk_buff **pp = NULL; | ||
2471 | struct sk_buff *p; | ||
2472 | struct tcphdr *th; | ||
2473 | struct tcphdr *th2; | ||
2474 | unsigned int thlen; | ||
2475 | unsigned int flags; | ||
2476 | unsigned int total; | ||
2477 | unsigned int mss = 1; | ||
2478 | int flush = 1; | ||
2479 | |||
2480 | if (!pskb_may_pull(skb, sizeof(*th))) | ||
2481 | goto out; | ||
2482 | |||
2483 | th = tcp_hdr(skb); | ||
2484 | thlen = th->doff * 4; | ||
2485 | if (thlen < sizeof(*th)) | ||
2486 | goto out; | ||
2487 | |||
2488 | if (!pskb_may_pull(skb, thlen)) | ||
2489 | goto out; | ||
2490 | |||
2491 | th = tcp_hdr(skb); | ||
2492 | __skb_pull(skb, thlen); | ||
2493 | |||
2494 | flags = tcp_flag_word(th); | ||
2495 | |||
2496 | for (; (p = *head); head = &p->next) { | ||
2497 | if (!NAPI_GRO_CB(p)->same_flow) | ||
2498 | continue; | ||
2499 | |||
2500 | th2 = tcp_hdr(p); | ||
2501 | |||
2502 | if (th->source != th2->source || th->dest != th2->dest) { | ||
2503 | NAPI_GRO_CB(p)->same_flow = 0; | ||
2504 | continue; | ||
2505 | } | ||
2506 | |||
2507 | goto found; | ||
2508 | } | ||
2509 | |||
2510 | goto out_check_final; | ||
2511 | |||
2512 | found: | ||
2513 | flush = NAPI_GRO_CB(p)->flush; | ||
2514 | flush |= flags & TCP_FLAG_CWR; | ||
2515 | flush |= (flags ^ tcp_flag_word(th2)) & | ||
2516 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); | ||
2517 | flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; | ||
2518 | flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); | ||
2519 | |||
2520 | total = p->len; | ||
2521 | mss = total; | ||
2522 | if (skb_shinfo(p)->frag_list) | ||
2523 | mss = skb_shinfo(p)->frag_list->len; | ||
2524 | |||
2525 | flush |= skb->len > mss || skb->len <= 0; | ||
2526 | flush |= ntohl(th2->seq) + total != ntohl(th->seq); | ||
2527 | |||
2528 | if (flush || skb_gro_receive(head, skb)) { | ||
2529 | mss = 1; | ||
2530 | goto out_check_final; | ||
2531 | } | ||
2532 | |||
2533 | p = *head; | ||
2534 | th2 = tcp_hdr(p); | ||
2535 | tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); | ||
2536 | |||
2537 | out_check_final: | ||
2538 | flush = skb->len < mss; | ||
2539 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | | ||
2540 | TCP_FLAG_SYN | TCP_FLAG_FIN); | ||
2541 | |||
2542 | if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) | ||
2543 | pp = head; | ||
2544 | |||
2545 | out: | ||
2546 | NAPI_GRO_CB(skb)->flush |= flush; | ||
2547 | |||
2548 | return pp; | ||
2549 | } | ||
2550 | |||
2551 | int tcp_gro_complete(struct sk_buff *skb) | ||
2552 | { | ||
2553 | struct tcphdr *th = tcp_hdr(skb); | ||
2554 | |||
2555 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
2556 | skb->csum_offset = offsetof(struct tcphdr, check); | ||
2557 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
2558 | |||
2559 | skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; | ||
2560 | skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; | ||
2561 | |||
2562 | if (th->cwr) | ||
2563 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; | ||
2564 | |||
2565 | return 0; | ||
2566 | } | ||
2567 | |||
2464 | #ifdef CONFIG_TCP_MD5SIG | 2568 | #ifdef CONFIG_TCP_MD5SIG |
2465 | static unsigned long tcp_md5sig_users; | 2569 | static unsigned long tcp_md5sig_users; |
2466 | static struct tcp_md5sig_pool **tcp_md5sig_pool; | 2570 | static struct tcp_md5sig_pool **tcp_md5sig_pool; |
@@ -2650,7 +2754,7 @@ EXPORT_SYMBOL(tcp_md5_hash_key); | |||
2650 | 2754 | ||
2651 | void tcp_done(struct sock *sk) | 2755 | void tcp_done(struct sock *sk) |
2652 | { | 2756 | { |
2653 | if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 2757 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
2654 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); | 2758 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); |
2655 | 2759 | ||
2656 | tcp_set_state(sk, TCP_CLOSE); | 2760 | tcp_set_state(sk, TCP_CLOSE); |
@@ -2685,6 +2789,8 @@ void __init tcp_init(void) | |||
2685 | 2789 | ||
2686 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 2790 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
2687 | 2791 | ||
2792 | percpu_counter_init(&tcp_sockets_allocated, 0); | ||
2793 | percpu_counter_init(&tcp_orphan_count, 0); | ||
2688 | tcp_hashinfo.bind_bucket_cachep = | 2794 | tcp_hashinfo.bind_bucket_cachep = |
2689 | kmem_cache_create("tcp_bind_bucket", | 2795 | kmem_cache_create("tcp_bind_bucket", |
2690 | sizeof(struct inet_bind_bucket), 0, | 2796 | sizeof(struct inet_bind_bucket), 0, |
@@ -2707,8 +2813,8 @@ void __init tcp_init(void) | |||
2707 | thash_entries ? 0 : 512 * 1024); | 2813 | thash_entries ? 0 : 512 * 1024); |
2708 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; | 2814 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; |
2709 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { | 2815 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { |
2710 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); | 2816 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); |
2711 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); | 2817 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); |
2712 | } | 2818 | } |
2713 | if (inet_ehash_locks_alloc(&tcp_hashinfo)) | 2819 | if (inet_ehash_locks_alloc(&tcp_hashinfo)) |
2714 | panic("TCP: failed to alloc ehash_locks"); | 2820 | panic("TCP: failed to alloc ehash_locks"); |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a1221e5e8ee..ee467ec40c4f 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -1,13 +1,23 @@ | |||
1 | /* | 1 | /* |
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 | 2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 |
3 | * Home page: | 3 | * Home page: |
4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC | 4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC |
5 | * This is from the implementation of CUBIC TCP in | 5 | * This is from the implementation of CUBIC TCP in |
6 | * Injong Rhee, Lisong Xu. | 6 | * Sangtae Ha, Injong Rhee and Lisong Xu, |
7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant | 7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" |
8 | * in PFLDnet 2005 | 8 | * in ACM SIGOPS Operating System Review, July 2008. |
9 | * Available from: | 9 | * Available from: |
10 | * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf | 10 | * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf |
11 | * | ||
12 | * CUBIC integrates a new slow start algorithm, called HyStart. | ||
13 | * The details of HyStart are presented in | ||
14 | * Sangtae Ha and Injong Rhee, | ||
15 | * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. | ||
16 | * Available from: | ||
17 | * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf | ||
18 | * | ||
19 | * All testing results are available from: | ||
20 | * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing | ||
11 | * | 21 | * |
12 | * Unless CUBIC is enabled and congestion window is large | 22 | * Unless CUBIC is enabled and congestion window is large |
13 | * this behaves the same as the original Reno. | 23 | * this behaves the same as the original Reno. |
@@ -23,12 +33,26 @@ | |||
23 | */ | 33 | */ |
24 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ | 34 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
25 | 35 | ||
36 | /* Two methods of hybrid slow start */ | ||
37 | #define HYSTART_ACK_TRAIN 0x1 | ||
38 | #define HYSTART_DELAY 0x2 | ||
39 | |||
40 | /* Number of delay samples for detecting the increase of delay */ | ||
41 | #define HYSTART_MIN_SAMPLES 8 | ||
42 | #define HYSTART_DELAY_MIN (2U<<3) | ||
43 | #define HYSTART_DELAY_MAX (16U<<3) | ||
44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) | ||
45 | |||
26 | static int fast_convergence __read_mostly = 1; | 46 | static int fast_convergence __read_mostly = 1; |
27 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ | 47 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
28 | static int initial_ssthresh __read_mostly; | 48 | static int initial_ssthresh __read_mostly; |
29 | static int bic_scale __read_mostly = 41; | 49 | static int bic_scale __read_mostly = 41; |
30 | static int tcp_friendliness __read_mostly = 1; | 50 | static int tcp_friendliness __read_mostly = 1; |
31 | 51 | ||
52 | static int hystart __read_mostly = 1; | ||
53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; | ||
54 | static int hystart_low_window __read_mostly = 16; | ||
55 | |||
32 | static u32 cube_rtt_scale __read_mostly; | 56 | static u32 cube_rtt_scale __read_mostly; |
33 | static u32 beta_scale __read_mostly; | 57 | static u32 beta_scale __read_mostly; |
34 | static u64 cube_factor __read_mostly; | 58 | static u64 cube_factor __read_mostly; |
@@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444); | |||
44 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); | 68 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); |
45 | module_param(tcp_friendliness, int, 0644); | 69 | module_param(tcp_friendliness, int, 0644); |
46 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | 70 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
71 | module_param(hystart, int, 0644); | ||
72 | MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); | ||
73 | module_param(hystart_detect, int, 0644); | ||
74 | MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" | ||
75 | " 1: packet-train 2: delay 3: both packet-train and delay"); | ||
76 | module_param(hystart_low_window, int, 0644); | ||
77 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); | ||
47 | 78 | ||
48 | /* BIC TCP Parameters */ | 79 | /* BIC TCP Parameters */ |
49 | struct bictcp { | 80 | struct bictcp { |
@@ -59,7 +90,13 @@ struct bictcp { | |||
59 | u32 ack_cnt; /* number of acks */ | 90 | u32 ack_cnt; /* number of acks */ |
60 | u32 tcp_cwnd; /* estimated tcp cwnd */ | 91 | u32 tcp_cwnd; /* estimated tcp cwnd */ |
61 | #define ACK_RATIO_SHIFT 4 | 92 | #define ACK_RATIO_SHIFT 4 |
62 | u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | 93 | u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
94 | u8 sample_cnt; /* number of samples to decide curr_rtt */ | ||
95 | u8 found; /* the exit point is found? */ | ||
96 | u32 round_start; /* beginning of each round */ | ||
97 | u32 end_seq; /* end_seq of the round */ | ||
98 | u32 last_jiffies; /* last time when the ACK spacing is close */ | ||
99 | u32 curr_rtt; /* the minimum rtt of current round */ | ||
63 | }; | 100 | }; |
64 | 101 | ||
65 | static inline void bictcp_reset(struct bictcp *ca) | 102 | static inline void bictcp_reset(struct bictcp *ca) |
@@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
76 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | 113 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
77 | ca->ack_cnt = 0; | 114 | ca->ack_cnt = 0; |
78 | ca->tcp_cwnd = 0; | 115 | ca->tcp_cwnd = 0; |
116 | ca->found = 0; | ||
117 | } | ||
118 | |||
119 | static inline void bictcp_hystart_reset(struct sock *sk) | ||
120 | { | ||
121 | struct tcp_sock *tp = tcp_sk(sk); | ||
122 | struct bictcp *ca = inet_csk_ca(sk); | ||
123 | |||
124 | ca->round_start = ca->last_jiffies = jiffies; | ||
125 | ca->end_seq = tp->snd_nxt; | ||
126 | ca->curr_rtt = 0; | ||
127 | ca->sample_cnt = 0; | ||
79 | } | 128 | } |
80 | 129 | ||
81 | static void bictcp_init(struct sock *sk) | 130 | static void bictcp_init(struct sock *sk) |
82 | { | 131 | { |
83 | bictcp_reset(inet_csk_ca(sk)); | 132 | bictcp_reset(inet_csk_ca(sk)); |
84 | if (initial_ssthresh) | 133 | |
134 | if (hystart) | ||
135 | bictcp_hystart_reset(sk); | ||
136 | |||
137 | if (!hystart && initial_ssthresh) | ||
85 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 138 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
86 | } | 139 | } |
87 | 140 | ||
@@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
235 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 288 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
236 | return; | 289 | return; |
237 | 290 | ||
238 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 291 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
292 | if (hystart && after(ack, ca->end_seq)) | ||
293 | bictcp_hystart_reset(sk); | ||
239 | tcp_slow_start(tp); | 294 | tcp_slow_start(tp); |
240 | else { | 295 | } else { |
241 | bictcp_update(ca, tp->snd_cwnd); | 296 | bictcp_update(ca, tp->snd_cwnd); |
242 | 297 | ||
243 | /* In dangerous area, increase slowly. | 298 | /* In dangerous area, increase slowly. |
@@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
281 | 336 | ||
282 | static void bictcp_state(struct sock *sk, u8 new_state) | 337 | static void bictcp_state(struct sock *sk, u8 new_state) |
283 | { | 338 | { |
284 | if (new_state == TCP_CA_Loss) | 339 | if (new_state == TCP_CA_Loss) { |
285 | bictcp_reset(inet_csk_ca(sk)); | 340 | bictcp_reset(inet_csk_ca(sk)); |
341 | bictcp_hystart_reset(sk); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | static void hystart_update(struct sock *sk, u32 delay) | ||
346 | { | ||
347 | struct tcp_sock *tp = tcp_sk(sk); | ||
348 | struct bictcp *ca = inet_csk_ca(sk); | ||
349 | |||
350 | if (!(ca->found & hystart_detect)) { | ||
351 | u32 curr_jiffies = jiffies; | ||
352 | |||
353 | /* first detection parameter - ack-train detection */ | ||
354 | if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { | ||
355 | ca->last_jiffies = curr_jiffies; | ||
356 | if (curr_jiffies - ca->round_start >= ca->delay_min>>4) | ||
357 | ca->found |= HYSTART_ACK_TRAIN; | ||
358 | } | ||
359 | |||
360 | /* obtain the minimum delay of more than sampling packets */ | ||
361 | if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { | ||
362 | if (ca->curr_rtt == 0 || ca->curr_rtt > delay) | ||
363 | ca->curr_rtt = delay; | ||
364 | |||
365 | ca->sample_cnt++; | ||
366 | } else { | ||
367 | if (ca->curr_rtt > ca->delay_min + | ||
368 | HYSTART_DELAY_THRESH(ca->delay_min>>4)) | ||
369 | ca->found |= HYSTART_DELAY; | ||
370 | } | ||
371 | /* | ||
372 | * Either one of two conditions are met, | ||
373 | * we exit from slow start immediately. | ||
374 | */ | ||
375 | if (ca->found & hystart_detect) | ||
376 | tp->snd_ssthresh = tp->snd_cwnd; | ||
377 | } | ||
286 | } | 378 | } |
287 | 379 | ||
288 | /* Track delayed acknowledgment ratio using sliding window | 380 | /* Track delayed acknowledgment ratio using sliding window |
@@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) | |||
291 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | 383 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) |
292 | { | 384 | { |
293 | const struct inet_connection_sock *icsk = inet_csk(sk); | 385 | const struct inet_connection_sock *icsk = inet_csk(sk); |
386 | const struct tcp_sock *tp = tcp_sk(sk); | ||
294 | struct bictcp *ca = inet_csk_ca(sk); | 387 | struct bictcp *ca = inet_csk_ca(sk); |
295 | u32 delay; | 388 | u32 delay; |
296 | 389 | ||
@@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
314 | /* first time call or link delay decreases */ | 407 | /* first time call or link delay decreases */ |
315 | if (ca->delay_min == 0 || ca->delay_min > delay) | 408 | if (ca->delay_min == 0 || ca->delay_min > delay) |
316 | ca->delay_min = delay; | 409 | ca->delay_min = delay; |
410 | |||
411 | /* hystart triggers when cwnd is larger than some threshold */ | ||
412 | if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && | ||
413 | tp->snd_cwnd >= hystart_low_window) | ||
414 | hystart_update(sk, delay); | ||
317 | } | 415 | } |
318 | 416 | ||
319 | static struct tcp_congestion_ops cubictcp = { | 417 | static struct tcp_congestion_ops cubictcp = { |
@@ -372,4 +470,4 @@ module_exit(cubictcp_unregister); | |||
372 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | 470 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
373 | MODULE_LICENSE("GPL"); | 471 | MODULE_LICENSE("GPL"); |
374 | MODULE_DESCRIPTION("CUBIC TCP"); | 472 | MODULE_DESCRIPTION("CUBIC TCP"); |
375 | MODULE_VERSION("2.2"); | 473 | MODULE_VERSION("2.3"); |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 838d491dfda7..fcbcd4ff6c5f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -34,7 +34,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | |||
34 | tcp_get_info(sk, info); | 34 | tcp_get_info(sk, info); |
35 | } | 35 | } |
36 | 36 | ||
37 | static struct inet_diag_handler tcp_diag_handler = { | 37 | static const struct inet_diag_handler tcp_diag_handler = { |
38 | .idiag_hashinfo = &tcp_hashinfo, | 38 | .idiag_hashinfo = &tcp_hashinfo, |
39 | .idiag_get_info = tcp_diag_get_info, | 39 | .idiag_get_info = tcp_diag_get_info, |
40 | .idiag_type = TCPDIAG_GETSOCK, | 40 | .idiag_type = TCPDIAG_GETSOCK, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d77c0d29e239..99b7ecbe8893 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -701,13 +701,10 @@ static inline void tcp_set_rto(struct sock *sk) | |||
701 | * all the algo is pure shit and should be replaced | 701 | * all the algo is pure shit and should be replaced |
702 | * with correct one. It is exactly, which we pretend to do. | 702 | * with correct one. It is exactly, which we pretend to do. |
703 | */ | 703 | */ |
704 | } | ||
705 | 704 | ||
706 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 705 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
707 | * guarantees that rto is higher. | 706 | * guarantees that rto is higher. |
708 | */ | 707 | */ |
709 | static inline void tcp_bound_rto(struct sock *sk) | ||
710 | { | ||
711 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | 708 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) |
712 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | 709 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; |
713 | } | 710 | } |
@@ -928,7 +925,6 @@ static void tcp_init_metrics(struct sock *sk) | |||
928 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 925 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
929 | } | 926 | } |
930 | tcp_set_rto(sk); | 927 | tcp_set_rto(sk); |
931 | tcp_bound_rto(sk); | ||
932 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 928 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) |
933 | goto reset; | 929 | goto reset; |
934 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 930 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
@@ -1002,7 +998,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) | |||
1002 | } | 998 | } |
1003 | } | 999 | } |
1004 | 1000 | ||
1005 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) | 1001 | static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, |
1002 | struct sk_buff *skb) | ||
1006 | { | 1003 | { |
1007 | tcp_verify_retransmit_hint(tp, skb); | 1004 | tcp_verify_retransmit_hint(tp, skb); |
1008 | 1005 | ||
@@ -1236,31 +1233,58 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, | |||
1236 | return dup_sack; | 1233 | return dup_sack; |
1237 | } | 1234 | } |
1238 | 1235 | ||
1236 | struct tcp_sacktag_state { | ||
1237 | int reord; | ||
1238 | int fack_count; | ||
1239 | int flag; | ||
1240 | }; | ||
1241 | |||
1239 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, | 1242 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, |
1240 | * the incoming SACK may not exactly match but we can find smaller MSS | 1243 | * the incoming SACK may not exactly match but we can find smaller MSS |
1241 | * aligned portion of it that matches. Therefore we might need to fragment | 1244 | * aligned portion of it that matches. Therefore we might need to fragment |
1242 | * which may fail and creates some hassle (caller must handle error case | 1245 | * which may fail and creates some hassle (caller must handle error case |
1243 | * returns). | 1246 | * returns). |
1247 | * | ||
1248 | * FIXME: this could be merged to shift decision code | ||
1244 | */ | 1249 | */ |
1245 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | 1250 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, |
1246 | u32 start_seq, u32 end_seq) | 1251 | u32 start_seq, u32 end_seq) |
1247 | { | 1252 | { |
1248 | int in_sack, err; | 1253 | int in_sack, err; |
1249 | unsigned int pkt_len; | 1254 | unsigned int pkt_len; |
1255 | unsigned int mss; | ||
1250 | 1256 | ||
1251 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | 1257 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && |
1252 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | 1258 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); |
1253 | 1259 | ||
1254 | if (tcp_skb_pcount(skb) > 1 && !in_sack && | 1260 | if (tcp_skb_pcount(skb) > 1 && !in_sack && |
1255 | after(TCP_SKB_CB(skb)->end_seq, start_seq)) { | 1261 | after(TCP_SKB_CB(skb)->end_seq, start_seq)) { |
1256 | 1262 | mss = tcp_skb_mss(skb); | |
1257 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); | 1263 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); |
1258 | 1264 | ||
1259 | if (!in_sack) | 1265 | if (!in_sack) { |
1260 | pkt_len = start_seq - TCP_SKB_CB(skb)->seq; | 1266 | pkt_len = start_seq - TCP_SKB_CB(skb)->seq; |
1261 | else | 1267 | if (pkt_len < mss) |
1268 | pkt_len = mss; | ||
1269 | } else { | ||
1262 | pkt_len = end_seq - TCP_SKB_CB(skb)->seq; | 1270 | pkt_len = end_seq - TCP_SKB_CB(skb)->seq; |
1263 | err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); | 1271 | if (pkt_len < mss) |
1272 | return -EINVAL; | ||
1273 | } | ||
1274 | |||
1275 | /* Round if necessary so that SACKs cover only full MSSes | ||
1276 | * and/or the remaining small portion (if present) | ||
1277 | */ | ||
1278 | if (pkt_len > mss) { | ||
1279 | unsigned int new_len = (pkt_len / mss) * mss; | ||
1280 | if (!in_sack && new_len < pkt_len) { | ||
1281 | new_len += mss; | ||
1282 | if (new_len > skb->len) | ||
1283 | return 0; | ||
1284 | } | ||
1285 | pkt_len = new_len; | ||
1286 | } | ||
1287 | err = tcp_fragment(sk, skb, pkt_len, mss); | ||
1264 | if (err < 0) | 1288 | if (err < 0) |
1265 | return err; | 1289 | return err; |
1266 | } | 1290 | } |
@@ -1268,24 +1292,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1268 | return in_sack; | 1292 | return in_sack; |
1269 | } | 1293 | } |
1270 | 1294 | ||
1271 | static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | 1295 | static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, |
1272 | int *reord, int dup_sack, int fack_count) | 1296 | struct tcp_sacktag_state *state, |
1297 | int dup_sack, int pcount) | ||
1273 | { | 1298 | { |
1274 | struct tcp_sock *tp = tcp_sk(sk); | 1299 | struct tcp_sock *tp = tcp_sk(sk); |
1275 | u8 sacked = TCP_SKB_CB(skb)->sacked; | 1300 | u8 sacked = TCP_SKB_CB(skb)->sacked; |
1276 | int flag = 0; | 1301 | int fack_count = state->fack_count; |
1277 | 1302 | ||
1278 | /* Account D-SACK for retransmitted packet. */ | 1303 | /* Account D-SACK for retransmitted packet. */ |
1279 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1304 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1280 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1305 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) |
1281 | tp->undo_retrans--; | 1306 | tp->undo_retrans--; |
1282 | if (sacked & TCPCB_SACKED_ACKED) | 1307 | if (sacked & TCPCB_SACKED_ACKED) |
1283 | *reord = min(fack_count, *reord); | 1308 | state->reord = min(fack_count, state->reord); |
1284 | } | 1309 | } |
1285 | 1310 | ||
1286 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ | 1311 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ |
1287 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1312 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) |
1288 | return flag; | 1313 | return sacked; |
1289 | 1314 | ||
1290 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 1315 | if (!(sacked & TCPCB_SACKED_ACKED)) { |
1291 | if (sacked & TCPCB_SACKED_RETRANS) { | 1316 | if (sacked & TCPCB_SACKED_RETRANS) { |
@@ -1294,10 +1319,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1294 | * that retransmission is still in flight. | 1319 | * that retransmission is still in flight. |
1295 | */ | 1320 | */ |
1296 | if (sacked & TCPCB_LOST) { | 1321 | if (sacked & TCPCB_LOST) { |
1297 | TCP_SKB_CB(skb)->sacked &= | 1322 | sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1298 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1323 | tp->lost_out -= pcount; |
1299 | tp->lost_out -= tcp_skb_pcount(skb); | 1324 | tp->retrans_out -= pcount; |
1300 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
1301 | } | 1325 | } |
1302 | } else { | 1326 | } else { |
1303 | if (!(sacked & TCPCB_RETRANS)) { | 1327 | if (!(sacked & TCPCB_RETRANS)) { |
@@ -1306,56 +1330,280 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1306 | */ | 1330 | */ |
1307 | if (before(TCP_SKB_CB(skb)->seq, | 1331 | if (before(TCP_SKB_CB(skb)->seq, |
1308 | tcp_highest_sack_seq(tp))) | 1332 | tcp_highest_sack_seq(tp))) |
1309 | *reord = min(fack_count, *reord); | 1333 | state->reord = min(fack_count, |
1334 | state->reord); | ||
1310 | 1335 | ||
1311 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1336 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ |
1312 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) | 1337 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) |
1313 | flag |= FLAG_ONLY_ORIG_SACKED; | 1338 | state->flag |= FLAG_ONLY_ORIG_SACKED; |
1314 | } | 1339 | } |
1315 | 1340 | ||
1316 | if (sacked & TCPCB_LOST) { | 1341 | if (sacked & TCPCB_LOST) { |
1317 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1342 | sacked &= ~TCPCB_LOST; |
1318 | tp->lost_out -= tcp_skb_pcount(skb); | 1343 | tp->lost_out -= pcount; |
1319 | } | 1344 | } |
1320 | } | 1345 | } |
1321 | 1346 | ||
1322 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; | 1347 | sacked |= TCPCB_SACKED_ACKED; |
1323 | flag |= FLAG_DATA_SACKED; | 1348 | state->flag |= FLAG_DATA_SACKED; |
1324 | tp->sacked_out += tcp_skb_pcount(skb); | 1349 | tp->sacked_out += pcount; |
1325 | 1350 | ||
1326 | fack_count += tcp_skb_pcount(skb); | 1351 | fack_count += pcount; |
1327 | 1352 | ||
1328 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1353 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
1329 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1354 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && |
1330 | before(TCP_SKB_CB(skb)->seq, | 1355 | before(TCP_SKB_CB(skb)->seq, |
1331 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) | 1356 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
1332 | tp->lost_cnt_hint += tcp_skb_pcount(skb); | 1357 | tp->lost_cnt_hint += pcount; |
1333 | 1358 | ||
1334 | if (fack_count > tp->fackets_out) | 1359 | if (fack_count > tp->fackets_out) |
1335 | tp->fackets_out = fack_count; | 1360 | tp->fackets_out = fack_count; |
1336 | |||
1337 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | ||
1338 | tcp_advance_highest_sack(sk, skb); | ||
1339 | } | 1361 | } |
1340 | 1362 | ||
1341 | /* D-SACK. We can detect redundant retransmission in S|R and plain R | 1363 | /* D-SACK. We can detect redundant retransmission in S|R and plain R |
1342 | * frames and clear it. undo_retrans is decreased above, L|R frames | 1364 | * frames and clear it. undo_retrans is decreased above, L|R frames |
1343 | * are accounted above as well. | 1365 | * are accounted above as well. |
1344 | */ | 1366 | */ |
1345 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { | 1367 | if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) { |
1346 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1368 | sacked &= ~TCPCB_SACKED_RETRANS; |
1347 | tp->retrans_out -= tcp_skb_pcount(skb); | 1369 | tp->retrans_out -= pcount; |
1348 | } | 1370 | } |
1349 | 1371 | ||
1350 | return flag; | 1372 | return sacked; |
1373 | } | ||
1374 | |||
1375 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | ||
1376 | struct tcp_sacktag_state *state, | ||
1377 | unsigned int pcount, int shifted, int mss) | ||
1378 | { | ||
1379 | struct tcp_sock *tp = tcp_sk(sk); | ||
1380 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | ||
1381 | |||
1382 | BUG_ON(!pcount); | ||
1383 | |||
1384 | /* Tweak before seqno plays */ | ||
1385 | if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && | ||
1386 | !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) | ||
1387 | tp->lost_cnt_hint += pcount; | ||
1388 | |||
1389 | TCP_SKB_CB(prev)->end_seq += shifted; | ||
1390 | TCP_SKB_CB(skb)->seq += shifted; | ||
1391 | |||
1392 | skb_shinfo(prev)->gso_segs += pcount; | ||
1393 | BUG_ON(skb_shinfo(skb)->gso_segs < pcount); | ||
1394 | skb_shinfo(skb)->gso_segs -= pcount; | ||
1395 | |||
1396 | /* When we're adding to gso_segs == 1, gso_size will be zero, | ||
1397 | * in theory this shouldn't be necessary but as long as DSACK | ||
1398 | * code can come after this skb later on it's better to keep | ||
1399 | * setting gso_size to something. | ||
1400 | */ | ||
1401 | if (!skb_shinfo(prev)->gso_size) { | ||
1402 | skb_shinfo(prev)->gso_size = mss; | ||
1403 | skb_shinfo(prev)->gso_type = sk->sk_gso_type; | ||
1404 | } | ||
1405 | |||
1406 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ | ||
1407 | if (skb_shinfo(skb)->gso_segs <= 1) { | ||
1408 | skb_shinfo(skb)->gso_size = 0; | ||
1409 | skb_shinfo(skb)->gso_type = 0; | ||
1410 | } | ||
1411 | |||
1412 | /* We discard results */ | ||
1413 | tcp_sacktag_one(skb, sk, state, 0, pcount); | ||
1414 | |||
1415 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | ||
1416 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | ||
1417 | |||
1418 | if (skb->len > 0) { | ||
1419 | BUG_ON(!tcp_skb_pcount(skb)); | ||
1420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); | ||
1421 | return 0; | ||
1422 | } | ||
1423 | |||
1424 | /* Whole SKB was eaten :-) */ | ||
1425 | |||
1426 | if (skb == tp->retransmit_skb_hint) | ||
1427 | tp->retransmit_skb_hint = prev; | ||
1428 | if (skb == tp->scoreboard_skb_hint) | ||
1429 | tp->scoreboard_skb_hint = prev; | ||
1430 | if (skb == tp->lost_skb_hint) { | ||
1431 | tp->lost_skb_hint = prev; | ||
1432 | tp->lost_cnt_hint -= tcp_skb_pcount(prev); | ||
1433 | } | ||
1434 | |||
1435 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; | ||
1436 | if (skb == tcp_highest_sack(sk)) | ||
1437 | tcp_advance_highest_sack(sk, skb); | ||
1438 | |||
1439 | tcp_unlink_write_queue(skb, sk); | ||
1440 | sk_wmem_free_skb(sk, skb); | ||
1441 | |||
1442 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); | ||
1443 | |||
1444 | return 1; | ||
1445 | } | ||
1446 | |||
1447 | /* I wish gso_size would have a bit more sane initialization than | ||
1448 | * something-or-zero which complicates things | ||
1449 | */ | ||
1450 | static int tcp_skb_seglen(struct sk_buff *skb) | ||
1451 | { | ||
1452 | return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); | ||
1453 | } | ||
1454 | |||
1455 | /* Shifting pages past head area doesn't work */ | ||
1456 | static int skb_can_shift(struct sk_buff *skb) | ||
1457 | { | ||
1458 | return !skb_headlen(skb) && skb_is_nonlinear(skb); | ||
1459 | } | ||
1460 | |||
1461 | /* Try collapsing SACK blocks spanning across multiple skbs to a single | ||
1462 | * skb. | ||
1463 | */ | ||
1464 | static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | ||
1465 | struct tcp_sacktag_state *state, | ||
1466 | u32 start_seq, u32 end_seq, | ||
1467 | int dup_sack) | ||
1468 | { | ||
1469 | struct tcp_sock *tp = tcp_sk(sk); | ||
1470 | struct sk_buff *prev; | ||
1471 | int mss; | ||
1472 | int pcount = 0; | ||
1473 | int len; | ||
1474 | int in_sack; | ||
1475 | |||
1476 | if (!sk_can_gso(sk)) | ||
1477 | goto fallback; | ||
1478 | |||
1479 | /* Normally R but no L won't result in plain S */ | ||
1480 | if (!dup_sack && | ||
1481 | (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) | ||
1482 | goto fallback; | ||
1483 | if (!skb_can_shift(skb)) | ||
1484 | goto fallback; | ||
1485 | /* This frame is about to be dropped (was ACKed). */ | ||
1486 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | ||
1487 | goto fallback; | ||
1488 | |||
1489 | /* Can only happen with delayed DSACK + discard craziness */ | ||
1490 | if (unlikely(skb == tcp_write_queue_head(sk))) | ||
1491 | goto fallback; | ||
1492 | prev = tcp_write_queue_prev(sk, skb); | ||
1493 | |||
1494 | if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) | ||
1495 | goto fallback; | ||
1496 | |||
1497 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | ||
1498 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | ||
1499 | |||
1500 | if (in_sack) { | ||
1501 | len = skb->len; | ||
1502 | pcount = tcp_skb_pcount(skb); | ||
1503 | mss = tcp_skb_seglen(skb); | ||
1504 | |||
1505 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1506 | * drop this restriction as unnecessary | ||
1507 | */ | ||
1508 | if (mss != tcp_skb_seglen(prev)) | ||
1509 | goto fallback; | ||
1510 | } else { | ||
1511 | if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) | ||
1512 | goto noop; | ||
1513 | /* CHECKME: This is non-MSS split case only?, this will | ||
1514 | * cause skipped skbs due to advancing loop btw, original | ||
1515 | * has that feature too | ||
1516 | */ | ||
1517 | if (tcp_skb_pcount(skb) <= 1) | ||
1518 | goto noop; | ||
1519 | |||
1520 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); | ||
1521 | if (!in_sack) { | ||
1522 | /* TODO: head merge to next could be attempted here | ||
1523 | * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), | ||
1524 | * though it might not be worth of the additional hassle | ||
1525 | * | ||
1526 | * ...we can probably just fallback to what was done | ||
1527 | * previously. We could try merging non-SACKed ones | ||
1528 | * as well but it probably isn't going to buy off | ||
1529 | * because later SACKs might again split them, and | ||
1530 | * it would make skb timestamp tracking considerably | ||
1531 | * harder problem. | ||
1532 | */ | ||
1533 | goto fallback; | ||
1534 | } | ||
1535 | |||
1536 | len = end_seq - TCP_SKB_CB(skb)->seq; | ||
1537 | BUG_ON(len < 0); | ||
1538 | BUG_ON(len > skb->len); | ||
1539 | |||
1540 | /* MSS boundaries should be honoured or else pcount will | ||
1541 | * severely break even though it makes things bit trickier. | ||
1542 | * Optimize common case to avoid most of the divides | ||
1543 | */ | ||
1544 | mss = tcp_skb_mss(skb); | ||
1545 | |||
1546 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1547 | * drop this restriction as unnecessary | ||
1548 | */ | ||
1549 | if (mss != tcp_skb_seglen(prev)) | ||
1550 | goto fallback; | ||
1551 | |||
1552 | if (len == mss) { | ||
1553 | pcount = 1; | ||
1554 | } else if (len < mss) { | ||
1555 | goto noop; | ||
1556 | } else { | ||
1557 | pcount = len / mss; | ||
1558 | len = pcount * mss; | ||
1559 | } | ||
1560 | } | ||
1561 | |||
1562 | if (!skb_shift(prev, skb, len)) | ||
1563 | goto fallback; | ||
1564 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss)) | ||
1565 | goto out; | ||
1566 | |||
1567 | /* Hole filled allows collapsing with the next as well, this is very | ||
1568 | * useful when hole on every nth skb pattern happens | ||
1569 | */ | ||
1570 | if (prev == tcp_write_queue_tail(sk)) | ||
1571 | goto out; | ||
1572 | skb = tcp_write_queue_next(sk, prev); | ||
1573 | |||
1574 | if (!skb_can_shift(skb) || | ||
1575 | (skb == tcp_send_head(sk)) || | ||
1576 | ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || | ||
1577 | (mss != tcp_skb_seglen(skb))) | ||
1578 | goto out; | ||
1579 | |||
1580 | len = skb->len; | ||
1581 | if (skb_shift(prev, skb, len)) { | ||
1582 | pcount += tcp_skb_pcount(skb); | ||
1583 | tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss); | ||
1584 | } | ||
1585 | |||
1586 | out: | ||
1587 | state->fack_count += pcount; | ||
1588 | return prev; | ||
1589 | |||
1590 | noop: | ||
1591 | return skb; | ||
1592 | |||
1593 | fallback: | ||
1594 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); | ||
1595 | return NULL; | ||
1351 | } | 1596 | } |
1352 | 1597 | ||
1353 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | 1598 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, |
1354 | struct tcp_sack_block *next_dup, | 1599 | struct tcp_sack_block *next_dup, |
1600 | struct tcp_sacktag_state *state, | ||
1355 | u32 start_seq, u32 end_seq, | 1601 | u32 start_seq, u32 end_seq, |
1356 | int dup_sack_in, int *fack_count, | 1602 | int dup_sack_in) |
1357 | int *reord, int *flag) | ||
1358 | { | 1603 | { |
1604 | struct tcp_sock *tp = tcp_sk(sk); | ||
1605 | struct sk_buff *tmp; | ||
1606 | |||
1359 | tcp_for_write_queue_from(skb, sk) { | 1607 | tcp_for_write_queue_from(skb, sk) { |
1360 | int in_sack = 0; | 1608 | int in_sack = 0; |
1361 | int dup_sack = dup_sack_in; | 1609 | int dup_sack = dup_sack_in; |
@@ -1376,17 +1624,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1376 | dup_sack = 1; | 1624 | dup_sack = 1; |
1377 | } | 1625 | } |
1378 | 1626 | ||
1379 | if (in_sack <= 0) | 1627 | /* skb reference here is a bit tricky to get right, since |
1380 | in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, | 1628 | * shifting can eat and free both this skb and the next, |
1381 | end_seq); | 1629 | * so not even _safe variant of the loop is enough. |
1630 | */ | ||
1631 | if (in_sack <= 0) { | ||
1632 | tmp = tcp_shift_skb_data(sk, skb, state, | ||
1633 | start_seq, end_seq, dup_sack); | ||
1634 | if (tmp != NULL) { | ||
1635 | if (tmp != skb) { | ||
1636 | skb = tmp; | ||
1637 | continue; | ||
1638 | } | ||
1639 | |||
1640 | in_sack = 0; | ||
1641 | } else { | ||
1642 | in_sack = tcp_match_skb_to_sack(sk, skb, | ||
1643 | start_seq, | ||
1644 | end_seq); | ||
1645 | } | ||
1646 | } | ||
1647 | |||
1382 | if (unlikely(in_sack < 0)) | 1648 | if (unlikely(in_sack < 0)) |
1383 | break; | 1649 | break; |
1384 | 1650 | ||
1385 | if (in_sack) | 1651 | if (in_sack) { |
1386 | *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, | 1652 | TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, |
1387 | *fack_count); | 1653 | state, |
1654 | dup_sack, | ||
1655 | tcp_skb_pcount(skb)); | ||
1656 | |||
1657 | if (!before(TCP_SKB_CB(skb)->seq, | ||
1658 | tcp_highest_sack_seq(tp))) | ||
1659 | tcp_advance_highest_sack(sk, skb); | ||
1660 | } | ||
1388 | 1661 | ||
1389 | *fack_count += tcp_skb_pcount(skb); | 1662 | state->fack_count += tcp_skb_pcount(skb); |
1390 | } | 1663 | } |
1391 | return skb; | 1664 | return skb; |
1392 | } | 1665 | } |
@@ -1395,16 +1668,17 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1395 | * a normal way | 1668 | * a normal way |
1396 | */ | 1669 | */ |
1397 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | 1670 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, |
1398 | u32 skip_to_seq, int *fack_count) | 1671 | struct tcp_sacktag_state *state, |
1672 | u32 skip_to_seq) | ||
1399 | { | 1673 | { |
1400 | tcp_for_write_queue_from(skb, sk) { | 1674 | tcp_for_write_queue_from(skb, sk) { |
1401 | if (skb == tcp_send_head(sk)) | 1675 | if (skb == tcp_send_head(sk)) |
1402 | break; | 1676 | break; |
1403 | 1677 | ||
1404 | if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) | 1678 | if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) |
1405 | break; | 1679 | break; |
1406 | 1680 | ||
1407 | *fack_count += tcp_skb_pcount(skb); | 1681 | state->fack_count += tcp_skb_pcount(skb); |
1408 | } | 1682 | } |
1409 | return skb; | 1683 | return skb; |
1410 | } | 1684 | } |
@@ -1412,18 +1686,17 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | |||
1412 | static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, | 1686 | static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, |
1413 | struct sock *sk, | 1687 | struct sock *sk, |
1414 | struct tcp_sack_block *next_dup, | 1688 | struct tcp_sack_block *next_dup, |
1415 | u32 skip_to_seq, | 1689 | struct tcp_sacktag_state *state, |
1416 | int *fack_count, int *reord, | 1690 | u32 skip_to_seq) |
1417 | int *flag) | ||
1418 | { | 1691 | { |
1419 | if (next_dup == NULL) | 1692 | if (next_dup == NULL) |
1420 | return skb; | 1693 | return skb; |
1421 | 1694 | ||
1422 | if (before(next_dup->start_seq, skip_to_seq)) { | 1695 | if (before(next_dup->start_seq, skip_to_seq)) { |
1423 | skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); | 1696 | skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq); |
1424 | skb = tcp_sacktag_walk(skb, sk, NULL, | 1697 | skb = tcp_sacktag_walk(skb, sk, NULL, state, |
1425 | next_dup->start_seq, next_dup->end_seq, | 1698 | next_dup->start_seq, next_dup->end_seq, |
1426 | 1, fack_count, reord, flag); | 1699 | 1); |
1427 | } | 1700 | } |
1428 | 1701 | ||
1429 | return skb; | 1702 | return skb; |
@@ -1445,16 +1718,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1445 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); | 1718 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); |
1446 | struct tcp_sack_block sp[TCP_NUM_SACKS]; | 1719 | struct tcp_sack_block sp[TCP_NUM_SACKS]; |
1447 | struct tcp_sack_block *cache; | 1720 | struct tcp_sack_block *cache; |
1721 | struct tcp_sacktag_state state; | ||
1448 | struct sk_buff *skb; | 1722 | struct sk_buff *skb; |
1449 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); | 1723 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); |
1450 | int used_sacks; | 1724 | int used_sacks; |
1451 | int reord = tp->packets_out; | ||
1452 | int flag = 0; | ||
1453 | int found_dup_sack = 0; | 1725 | int found_dup_sack = 0; |
1454 | int fack_count; | ||
1455 | int i, j; | 1726 | int i, j; |
1456 | int first_sack_index; | 1727 | int first_sack_index; |
1457 | 1728 | ||
1729 | state.flag = 0; | ||
1730 | state.reord = tp->packets_out; | ||
1731 | |||
1458 | if (!tp->sacked_out) { | 1732 | if (!tp->sacked_out) { |
1459 | if (WARN_ON(tp->fackets_out)) | 1733 | if (WARN_ON(tp->fackets_out)) |
1460 | tp->fackets_out = 0; | 1734 | tp->fackets_out = 0; |
@@ -1464,7 +1738,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1464 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, | 1738 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, |
1465 | num_sacks, prior_snd_una); | 1739 | num_sacks, prior_snd_una); |
1466 | if (found_dup_sack) | 1740 | if (found_dup_sack) |
1467 | flag |= FLAG_DSACKING_ACK; | 1741 | state.flag |= FLAG_DSACKING_ACK; |
1468 | 1742 | ||
1469 | /* Eliminate too old ACKs, but take into | 1743 | /* Eliminate too old ACKs, but take into |
1470 | * account more or less fresh ones, they can | 1744 | * account more or less fresh ones, they can |
@@ -1533,7 +1807,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1533 | } | 1807 | } |
1534 | 1808 | ||
1535 | skb = tcp_write_queue_head(sk); | 1809 | skb = tcp_write_queue_head(sk); |
1536 | fack_count = 0; | 1810 | state.fack_count = 0; |
1537 | i = 0; | 1811 | i = 0; |
1538 | 1812 | ||
1539 | if (!tp->sacked_out) { | 1813 | if (!tp->sacked_out) { |
@@ -1558,7 +1832,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1558 | 1832 | ||
1559 | /* Event "B" in the comment above. */ | 1833 | /* Event "B" in the comment above. */ |
1560 | if (after(end_seq, tp->high_seq)) | 1834 | if (after(end_seq, tp->high_seq)) |
1561 | flag |= FLAG_DATA_LOST; | 1835 | state.flag |= FLAG_DATA_LOST; |
1562 | 1836 | ||
1563 | /* Skip too early cached blocks */ | 1837 | /* Skip too early cached blocks */ |
1564 | while (tcp_sack_cache_ok(tp, cache) && | 1838 | while (tcp_sack_cache_ok(tp, cache) && |
@@ -1571,13 +1845,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1571 | 1845 | ||
1572 | /* Head todo? */ | 1846 | /* Head todo? */ |
1573 | if (before(start_seq, cache->start_seq)) { | 1847 | if (before(start_seq, cache->start_seq)) { |
1574 | skb = tcp_sacktag_skip(skb, sk, start_seq, | 1848 | skb = tcp_sacktag_skip(skb, sk, &state, |
1575 | &fack_count); | 1849 | start_seq); |
1576 | skb = tcp_sacktag_walk(skb, sk, next_dup, | 1850 | skb = tcp_sacktag_walk(skb, sk, next_dup, |
1851 | &state, | ||
1577 | start_seq, | 1852 | start_seq, |
1578 | cache->start_seq, | 1853 | cache->start_seq, |
1579 | dup_sack, &fack_count, | 1854 | dup_sack); |
1580 | &reord, &flag); | ||
1581 | } | 1855 | } |
1582 | 1856 | ||
1583 | /* Rest of the block already fully processed? */ | 1857 | /* Rest of the block already fully processed? */ |
@@ -1585,9 +1859,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1585 | goto advance_sp; | 1859 | goto advance_sp; |
1586 | 1860 | ||
1587 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, | 1861 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, |
1588 | cache->end_seq, | 1862 | &state, |
1589 | &fack_count, &reord, | 1863 | cache->end_seq); |
1590 | &flag); | ||
1591 | 1864 | ||
1592 | /* ...tail remains todo... */ | 1865 | /* ...tail remains todo... */ |
1593 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { | 1866 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { |
@@ -1595,13 +1868,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1595 | skb = tcp_highest_sack(sk); | 1868 | skb = tcp_highest_sack(sk); |
1596 | if (skb == NULL) | 1869 | if (skb == NULL) |
1597 | break; | 1870 | break; |
1598 | fack_count = tp->fackets_out; | 1871 | state.fack_count = tp->fackets_out; |
1599 | cache++; | 1872 | cache++; |
1600 | goto walk; | 1873 | goto walk; |
1601 | } | 1874 | } |
1602 | 1875 | ||
1603 | skb = tcp_sacktag_skip(skb, sk, cache->end_seq, | 1876 | skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); |
1604 | &fack_count); | ||
1605 | /* Check overlap against next cached too (past this one already) */ | 1877 | /* Check overlap against next cached too (past this one already) */ |
1606 | cache++; | 1878 | cache++; |
1607 | continue; | 1879 | continue; |
@@ -1611,20 +1883,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1611 | skb = tcp_highest_sack(sk); | 1883 | skb = tcp_highest_sack(sk); |
1612 | if (skb == NULL) | 1884 | if (skb == NULL) |
1613 | break; | 1885 | break; |
1614 | fack_count = tp->fackets_out; | 1886 | state.fack_count = tp->fackets_out; |
1615 | } | 1887 | } |
1616 | skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count); | 1888 | skb = tcp_sacktag_skip(skb, sk, &state, start_seq); |
1617 | 1889 | ||
1618 | walk: | 1890 | walk: |
1619 | skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, | 1891 | skb = tcp_sacktag_walk(skb, sk, next_dup, &state, |
1620 | dup_sack, &fack_count, &reord, &flag); | 1892 | start_seq, end_seq, dup_sack); |
1621 | 1893 | ||
1622 | advance_sp: | 1894 | advance_sp: |
1623 | /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct | 1895 | /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct |
1624 | * due to in-order walk | 1896 | * due to in-order walk |
1625 | */ | 1897 | */ |
1626 | if (after(end_seq, tp->frto_highmark)) | 1898 | if (after(end_seq, tp->frto_highmark)) |
1627 | flag &= ~FLAG_ONLY_ORIG_SACKED; | 1899 | state.flag &= ~FLAG_ONLY_ORIG_SACKED; |
1628 | 1900 | ||
1629 | i++; | 1901 | i++; |
1630 | } | 1902 | } |
@@ -1641,10 +1913,10 @@ advance_sp: | |||
1641 | 1913 | ||
1642 | tcp_verify_left_out(tp); | 1914 | tcp_verify_left_out(tp); |
1643 | 1915 | ||
1644 | if ((reord < tp->fackets_out) && | 1916 | if ((state.reord < tp->fackets_out) && |
1645 | ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && | 1917 | ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && |
1646 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) | 1918 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) |
1647 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 1919 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
1648 | 1920 | ||
1649 | out: | 1921 | out: |
1650 | 1922 | ||
@@ -1654,13 +1926,13 @@ out: | |||
1654 | WARN_ON((int)tp->retrans_out < 0); | 1926 | WARN_ON((int)tp->retrans_out < 0); |
1655 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); | 1927 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); |
1656 | #endif | 1928 | #endif |
1657 | return flag; | 1929 | return state.flag; |
1658 | } | 1930 | } |
1659 | 1931 | ||
1660 | /* Limits sacked_out so that sum with lost_out isn't ever larger than | 1932 | /* Limits sacked_out so that sum with lost_out isn't ever larger than |
1661 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. | 1933 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. |
1662 | */ | 1934 | */ |
1663 | int tcp_limit_reno_sacked(struct tcp_sock *tp) | 1935 | static int tcp_limit_reno_sacked(struct tcp_sock *tp) |
1664 | { | 1936 | { |
1665 | u32 holes; | 1937 | u32 holes; |
1666 | 1938 | ||
@@ -2336,9 +2608,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2336 | struct inet_sock *inet = inet_sk(sk); | 2608 | struct inet_sock *inet = inet_sk(sk); |
2337 | 2609 | ||
2338 | if (sk->sk_family == AF_INET) { | 2610 | if (sk->sk_family == AF_INET) { |
2339 | printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", | 2611 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
2340 | msg, | 2612 | msg, |
2341 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 2613 | &inet->daddr, ntohs(inet->dport), |
2342 | tp->snd_cwnd, tcp_left_out(tp), | 2614 | tp->snd_cwnd, tcp_left_out(tp), |
2343 | tp->snd_ssthresh, tp->prior_ssthresh, | 2615 | tp->snd_ssthresh, tp->prior_ssthresh, |
2344 | tp->packets_out); | 2616 | tp->packets_out); |
@@ -2346,9 +2618,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2346 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 2618 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2347 | else if (sk->sk_family == AF_INET6) { | 2619 | else if (sk->sk_family == AF_INET6) { |
2348 | struct ipv6_pinfo *np = inet6_sk(sk); | 2620 | struct ipv6_pinfo *np = inet6_sk(sk); |
2349 | printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", | 2621 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
2350 | msg, | 2622 | msg, |
2351 | NIP6(np->daddr), ntohs(inet->dport), | 2623 | &np->daddr, ntohs(inet->dport), |
2352 | tp->snd_cwnd, tcp_left_out(tp), | 2624 | tp->snd_cwnd, tcp_left_out(tp), |
2353 | tp->snd_ssthresh, tp->prior_ssthresh, | 2625 | tp->snd_ssthresh, tp->prior_ssthresh, |
2354 | tp->packets_out); | 2626 | tp->packets_out); |
@@ -2559,6 +2831,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) | |||
2559 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 2831 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
2560 | } | 2832 | } |
2561 | 2833 | ||
2834 | /* Do a simple retransmit without using the backoff mechanisms in | ||
2835 | * tcp_timer. This is used for path mtu discovery. | ||
2836 | * The socket is already locked here. | ||
2837 | */ | ||
2838 | void tcp_simple_retransmit(struct sock *sk) | ||
2839 | { | ||
2840 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2841 | struct tcp_sock *tp = tcp_sk(sk); | ||
2842 | struct sk_buff *skb; | ||
2843 | unsigned int mss = tcp_current_mss(sk, 0); | ||
2844 | u32 prior_lost = tp->lost_out; | ||
2845 | |||
2846 | tcp_for_write_queue(skb, sk) { | ||
2847 | if (skb == tcp_send_head(sk)) | ||
2848 | break; | ||
2849 | if (tcp_skb_seglen(skb) > mss && | ||
2850 | !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | ||
2851 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | ||
2852 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
2853 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
2854 | } | ||
2855 | tcp_skb_mark_lost_uncond_verify(tp, skb); | ||
2856 | } | ||
2857 | } | ||
2858 | |||
2859 | tcp_clear_retrans_hints_partial(tp); | ||
2860 | |||
2861 | if (prior_lost == tp->lost_out) | ||
2862 | return; | ||
2863 | |||
2864 | if (tcp_is_reno(tp)) | ||
2865 | tcp_limit_reno_sacked(tp); | ||
2866 | |||
2867 | tcp_verify_left_out(tp); | ||
2868 | |||
2869 | /* Don't muck with the congestion window here. | ||
2870 | * Reason is that we do not increase amount of _data_ | ||
2871 | * in network, but units changed and effective | ||
2872 | * cwnd/ssthresh really reduced now. | ||
2873 | */ | ||
2874 | if (icsk->icsk_ca_state != TCP_CA_Loss) { | ||
2875 | tp->high_seq = tp->snd_nxt; | ||
2876 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
2877 | tp->prior_ssthresh = 0; | ||
2878 | tp->undo_marker = 0; | ||
2879 | tcp_set_ca_state(sk, TCP_CA_Loss); | ||
2880 | } | ||
2881 | tcp_xmit_retransmit_queue(sk); | ||
2882 | } | ||
2883 | |||
2562 | /* Process an event, which can update packets-in-flight not trivially. | 2884 | /* Process an event, which can update packets-in-flight not trivially. |
2563 | * Main goal of this function is to calculate new estimate for left_out, | 2885 | * Main goal of this function is to calculate new estimate for left_out, |
2564 | * taking into account both packets sitting in receiver's buffer and | 2886 | * taking into account both packets sitting in receiver's buffer and |
@@ -2730,6 +3052,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
2730 | tcp_xmit_retransmit_queue(sk); | 3052 | tcp_xmit_retransmit_queue(sk); |
2731 | } | 3053 | } |
2732 | 3054 | ||
3055 | static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | ||
3056 | { | ||
3057 | tcp_rtt_estimator(sk, seq_rtt); | ||
3058 | tcp_set_rto(sk); | ||
3059 | inet_csk(sk)->icsk_backoff = 0; | ||
3060 | } | ||
3061 | |||
2733 | /* Read draft-ietf-tcplw-high-performance before mucking | 3062 | /* Read draft-ietf-tcplw-high-performance before mucking |
2734 | * with this code. (Supersedes RFC1323) | 3063 | * with this code. (Supersedes RFC1323) |
2735 | */ | 3064 | */ |
@@ -2751,11 +3080,8 @@ static void tcp_ack_saw_tstamp(struct sock *sk, int flag) | |||
2751 | * in window is lost... Voila. --ANK (010210) | 3080 | * in window is lost... Voila. --ANK (010210) |
2752 | */ | 3081 | */ |
2753 | struct tcp_sock *tp = tcp_sk(sk); | 3082 | struct tcp_sock *tp = tcp_sk(sk); |
2754 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 3083 | |
2755 | tcp_rtt_estimator(sk, seq_rtt); | 3084 | tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); |
2756 | tcp_set_rto(sk); | ||
2757 | inet_csk(sk)->icsk_backoff = 0; | ||
2758 | tcp_bound_rto(sk); | ||
2759 | } | 3085 | } |
2760 | 3086 | ||
2761 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) | 3087 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) |
@@ -2772,10 +3098,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) | |||
2772 | if (flag & FLAG_RETRANS_DATA_ACKED) | 3098 | if (flag & FLAG_RETRANS_DATA_ACKED) |
2773 | return; | 3099 | return; |
2774 | 3100 | ||
2775 | tcp_rtt_estimator(sk, seq_rtt); | 3101 | tcp_valid_rtt_meas(sk, seq_rtt); |
2776 | tcp_set_rto(sk); | ||
2777 | inet_csk(sk)->icsk_backoff = 0; | ||
2778 | tcp_bound_rto(sk); | ||
2779 | } | 3102 | } |
2780 | 3103 | ||
2781 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 3104 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5c8fa7f1e327..10172487921b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) | |||
97 | } | 97 | } |
98 | #endif | 98 | #endif |
99 | 99 | ||
100 | struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | 100 | struct inet_hashinfo tcp_hashinfo; |
101 | .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), | ||
102 | .lhash_users = ATOMIC_INIT(0), | ||
103 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | ||
104 | }; | ||
105 | 101 | ||
106 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) | 102 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) |
107 | { | 103 | { |
@@ -492,7 +488,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | |||
492 | skb->csum_offset = offsetof(struct tcphdr, check); | 488 | skb->csum_offset = offsetof(struct tcphdr, check); |
493 | } else { | 489 | } else { |
494 | th->check = tcp_v4_check(len, inet->saddr, inet->daddr, | 490 | th->check = tcp_v4_check(len, inet->saddr, inet->daddr, |
495 | csum_partial((char *)th, | 491 | csum_partial(th, |
496 | th->doff << 2, | 492 | th->doff << 2, |
497 | skb->csum)); | 493 | skb->csum)); |
498 | } | 494 | } |
@@ -726,7 +722,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
726 | th->check = tcp_v4_check(skb->len, | 722 | th->check = tcp_v4_check(skb->len, |
727 | ireq->loc_addr, | 723 | ireq->loc_addr, |
728 | ireq->rmt_addr, | 724 | ireq->rmt_addr, |
729 | csum_partial((char *)th, skb->len, | 725 | csum_partial(th, skb->len, |
730 | skb->csum)); | 726 | skb->csum)); |
731 | 727 | ||
732 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | 728 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, |
@@ -1139,10 +1135,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | |||
1139 | 1135 | ||
1140 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { | 1136 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { |
1141 | if (net_ratelimit()) { | 1137 | if (net_ratelimit()) { |
1142 | printk(KERN_INFO "MD5 Hash failed for " | 1138 | printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", |
1143 | "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", | 1139 | &iph->saddr, ntohs(th->source), |
1144 | NIPQUAD(iph->saddr), ntohs(th->source), | 1140 | &iph->daddr, ntohs(th->dest), |
1145 | NIPQUAD(iph->daddr), ntohs(th->dest), | ||
1146 | genhash ? " tcp_v4_calc_md5_hash failed" : ""); | 1141 | genhash ? " tcp_v4_calc_md5_hash failed" : ""); |
1147 | } | 1142 | } |
1148 | return 1; | 1143 | return 1; |
@@ -1297,10 +1292,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1297 | * to destinations, already remembered | 1292 | * to destinations, already remembered |
1298 | * to the moment of synflood. | 1293 | * to the moment of synflood. |
1299 | */ | 1294 | */ |
1300 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " | 1295 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", |
1301 | "request from " NIPQUAD_FMT "/%u\n", | 1296 | &saddr, ntohs(tcp_hdr(skb)->source)); |
1302 | NIPQUAD(saddr), | ||
1303 | ntohs(tcp_hdr(skb)->source)); | ||
1304 | goto drop_and_release; | 1297 | goto drop_and_release; |
1305 | } | 1298 | } |
1306 | 1299 | ||
@@ -1804,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1804 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | 1797 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; |
1805 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1798 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
1806 | 1799 | ||
1807 | atomic_inc(&tcp_sockets_allocated); | 1800 | percpu_counter_inc(&tcp_sockets_allocated); |
1808 | 1801 | ||
1809 | return 0; | 1802 | return 0; |
1810 | } | 1803 | } |
@@ -1852,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1852 | sk->sk_sndmsg_page = NULL; | 1845 | sk->sk_sndmsg_page = NULL; |
1853 | } | 1846 | } |
1854 | 1847 | ||
1855 | atomic_dec(&tcp_sockets_allocated); | 1848 | percpu_counter_dec(&tcp_sockets_allocated); |
1856 | } | 1849 | } |
1857 | 1850 | ||
1858 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1851 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
@@ -1860,32 +1853,35 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); | |||
1860 | #ifdef CONFIG_PROC_FS | 1853 | #ifdef CONFIG_PROC_FS |
1861 | /* Proc filesystem TCP sock list dumping. */ | 1854 | /* Proc filesystem TCP sock list dumping. */ |
1862 | 1855 | ||
1863 | static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) | 1856 | static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) |
1864 | { | 1857 | { |
1865 | return hlist_empty(head) ? NULL : | 1858 | return hlist_nulls_empty(head) ? NULL : |
1866 | list_entry(head->first, struct inet_timewait_sock, tw_node); | 1859 | list_entry(head->first, struct inet_timewait_sock, tw_node); |
1867 | } | 1860 | } |
1868 | 1861 | ||
1869 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) | 1862 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) |
1870 | { | 1863 | { |
1871 | return tw->tw_node.next ? | 1864 | return !is_a_nulls(tw->tw_node.next) ? |
1872 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1865 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
1873 | } | 1866 | } |
1874 | 1867 | ||
1875 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1868 | static void *listening_get_next(struct seq_file *seq, void *cur) |
1876 | { | 1869 | { |
1877 | struct inet_connection_sock *icsk; | 1870 | struct inet_connection_sock *icsk; |
1878 | struct hlist_node *node; | 1871 | struct hlist_nulls_node *node; |
1879 | struct sock *sk = cur; | 1872 | struct sock *sk = cur; |
1880 | struct tcp_iter_state* st = seq->private; | 1873 | struct inet_listen_hashbucket *ilb; |
1874 | struct tcp_iter_state *st = seq->private; | ||
1881 | struct net *net = seq_file_net(seq); | 1875 | struct net *net = seq_file_net(seq); |
1882 | 1876 | ||
1883 | if (!sk) { | 1877 | if (!sk) { |
1884 | st->bucket = 0; | 1878 | st->bucket = 0; |
1885 | sk = sk_head(&tcp_hashinfo.listening_hash[0]); | 1879 | ilb = &tcp_hashinfo.listening_hash[0]; |
1880 | spin_lock_bh(&ilb->lock); | ||
1881 | sk = sk_nulls_head(&ilb->head); | ||
1886 | goto get_sk; | 1882 | goto get_sk; |
1887 | } | 1883 | } |
1888 | 1884 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; | |
1889 | ++st->num; | 1885 | ++st->num; |
1890 | 1886 | ||
1891 | if (st->state == TCP_SEQ_STATE_OPENREQ) { | 1887 | if (st->state == TCP_SEQ_STATE_OPENREQ) { |
@@ -1918,7 +1914,7 @@ get_req: | |||
1918 | sk = sk_next(sk); | 1914 | sk = sk_next(sk); |
1919 | } | 1915 | } |
1920 | get_sk: | 1916 | get_sk: |
1921 | sk_for_each_from(sk, node) { | 1917 | sk_nulls_for_each_from(sk, node) { |
1922 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { | 1918 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { |
1923 | cur = sk; | 1919 | cur = sk; |
1924 | goto out; | 1920 | goto out; |
@@ -1935,8 +1931,11 @@ start_req: | |||
1935 | } | 1931 | } |
1936 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 1932 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
1937 | } | 1933 | } |
1934 | spin_unlock_bh(&ilb->lock); | ||
1938 | if (++st->bucket < INET_LHTABLE_SIZE) { | 1935 | if (++st->bucket < INET_LHTABLE_SIZE) { |
1939 | sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); | 1936 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
1937 | spin_lock_bh(&ilb->lock); | ||
1938 | sk = sk_nulls_head(&ilb->head); | ||
1940 | goto get_sk; | 1939 | goto get_sk; |
1941 | } | 1940 | } |
1942 | cur = NULL; | 1941 | cur = NULL; |
@@ -1957,28 +1956,28 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
1957 | 1956 | ||
1958 | static inline int empty_bucket(struct tcp_iter_state *st) | 1957 | static inline int empty_bucket(struct tcp_iter_state *st) |
1959 | { | 1958 | { |
1960 | return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && | 1959 | return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && |
1961 | hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | 1960 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); |
1962 | } | 1961 | } |
1963 | 1962 | ||
1964 | static void *established_get_first(struct seq_file *seq) | 1963 | static void *established_get_first(struct seq_file *seq) |
1965 | { | 1964 | { |
1966 | struct tcp_iter_state* st = seq->private; | 1965 | struct tcp_iter_state *st = seq->private; |
1967 | struct net *net = seq_file_net(seq); | 1966 | struct net *net = seq_file_net(seq); |
1968 | void *rc = NULL; | 1967 | void *rc = NULL; |
1969 | 1968 | ||
1970 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { | 1969 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { |
1971 | struct sock *sk; | 1970 | struct sock *sk; |
1972 | struct hlist_node *node; | 1971 | struct hlist_nulls_node *node; |
1973 | struct inet_timewait_sock *tw; | 1972 | struct inet_timewait_sock *tw; |
1974 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); | 1973 | spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); |
1975 | 1974 | ||
1976 | /* Lockless fast path for the common case of empty buckets */ | 1975 | /* Lockless fast path for the common case of empty buckets */ |
1977 | if (empty_bucket(st)) | 1976 | if (empty_bucket(st)) |
1978 | continue; | 1977 | continue; |
1979 | 1978 | ||
1980 | read_lock_bh(lock); | 1979 | spin_lock_bh(lock); |
1981 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 1980 | sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
1982 | if (sk->sk_family != st->family || | 1981 | if (sk->sk_family != st->family || |
1983 | !net_eq(sock_net(sk), net)) { | 1982 | !net_eq(sock_net(sk), net)) { |
1984 | continue; | 1983 | continue; |
@@ -1996,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) | |||
1996 | rc = tw; | 1995 | rc = tw; |
1997 | goto out; | 1996 | goto out; |
1998 | } | 1997 | } |
1999 | read_unlock_bh(lock); | 1998 | spin_unlock_bh(lock); |
2000 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1999 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2001 | } | 2000 | } |
2002 | out: | 2001 | out: |
@@ -2007,8 +2006,8 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
2007 | { | 2006 | { |
2008 | struct sock *sk = cur; | 2007 | struct sock *sk = cur; |
2009 | struct inet_timewait_sock *tw; | 2008 | struct inet_timewait_sock *tw; |
2010 | struct hlist_node *node; | 2009 | struct hlist_nulls_node *node; |
2011 | struct tcp_iter_state* st = seq->private; | 2010 | struct tcp_iter_state *st = seq->private; |
2012 | struct net *net = seq_file_net(seq); | 2011 | struct net *net = seq_file_net(seq); |
2013 | 2012 | ||
2014 | ++st->num; | 2013 | ++st->num; |
@@ -2024,7 +2023,7 @@ get_tw: | |||
2024 | cur = tw; | 2023 | cur = tw; |
2025 | goto out; | 2024 | goto out; |
2026 | } | 2025 | } |
2027 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2026 | spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2028 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2027 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2029 | 2028 | ||
2030 | /* Look for next non empty bucket */ | 2029 | /* Look for next non empty bucket */ |
@@ -2034,12 +2033,12 @@ get_tw: | |||
2034 | if (st->bucket >= tcp_hashinfo.ehash_size) | 2033 | if (st->bucket >= tcp_hashinfo.ehash_size) |
2035 | return NULL; | 2034 | return NULL; |
2036 | 2035 | ||
2037 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2036 | spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2038 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | 2037 | sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); |
2039 | } else | 2038 | } else |
2040 | sk = sk_next(sk); | 2039 | sk = sk_nulls_next(sk); |
2041 | 2040 | ||
2042 | sk_for_each_from(sk, node) { | 2041 | sk_nulls_for_each_from(sk, node) { |
2043 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) | 2042 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) |
2044 | goto found; | 2043 | goto found; |
2045 | } | 2044 | } |
@@ -2067,14 +2066,12 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos) | |||
2067 | static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | 2066 | static void *tcp_get_idx(struct seq_file *seq, loff_t pos) |
2068 | { | 2067 | { |
2069 | void *rc; | 2068 | void *rc; |
2070 | struct tcp_iter_state* st = seq->private; | 2069 | struct tcp_iter_state *st = seq->private; |
2071 | 2070 | ||
2072 | inet_listen_lock(&tcp_hashinfo); | ||
2073 | st->state = TCP_SEQ_STATE_LISTENING; | 2071 | st->state = TCP_SEQ_STATE_LISTENING; |
2074 | rc = listening_get_idx(seq, &pos); | 2072 | rc = listening_get_idx(seq, &pos); |
2075 | 2073 | ||
2076 | if (!rc) { | 2074 | if (!rc) { |
2077 | inet_listen_unlock(&tcp_hashinfo); | ||
2078 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2075 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2079 | rc = established_get_idx(seq, pos); | 2076 | rc = established_get_idx(seq, pos); |
2080 | } | 2077 | } |
@@ -2084,7 +2081,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
2084 | 2081 | ||
2085 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) | 2082 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) |
2086 | { | 2083 | { |
2087 | struct tcp_iter_state* st = seq->private; | 2084 | struct tcp_iter_state *st = seq->private; |
2088 | st->state = TCP_SEQ_STATE_LISTENING; | 2085 | st->state = TCP_SEQ_STATE_LISTENING; |
2089 | st->num = 0; | 2086 | st->num = 0; |
2090 | return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2087 | return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
@@ -2093,7 +2090,7 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) | |||
2093 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2090 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2094 | { | 2091 | { |
2095 | void *rc = NULL; | 2092 | void *rc = NULL; |
2096 | struct tcp_iter_state* st; | 2093 | struct tcp_iter_state *st; |
2097 | 2094 | ||
2098 | if (v == SEQ_START_TOKEN) { | 2095 | if (v == SEQ_START_TOKEN) { |
2099 | rc = tcp_get_idx(seq, 0); | 2096 | rc = tcp_get_idx(seq, 0); |
@@ -2106,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2106 | case TCP_SEQ_STATE_LISTENING: | 2103 | case TCP_SEQ_STATE_LISTENING: |
2107 | rc = listening_get_next(seq, v); | 2104 | rc = listening_get_next(seq, v); |
2108 | if (!rc) { | 2105 | if (!rc) { |
2109 | inet_listen_unlock(&tcp_hashinfo); | ||
2110 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2106 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2111 | rc = established_get_first(seq); | 2107 | rc = established_get_first(seq); |
2112 | } | 2108 | } |
@@ -2123,7 +2119,7 @@ out: | |||
2123 | 2119 | ||
2124 | static void tcp_seq_stop(struct seq_file *seq, void *v) | 2120 | static void tcp_seq_stop(struct seq_file *seq, void *v) |
2125 | { | 2121 | { |
2126 | struct tcp_iter_state* st = seq->private; | 2122 | struct tcp_iter_state *st = seq->private; |
2127 | 2123 | ||
2128 | switch (st->state) { | 2124 | switch (st->state) { |
2129 | case TCP_SEQ_STATE_OPENREQ: | 2125 | case TCP_SEQ_STATE_OPENREQ: |
@@ -2133,12 +2129,12 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2133 | } | 2129 | } |
2134 | case TCP_SEQ_STATE_LISTENING: | 2130 | case TCP_SEQ_STATE_LISTENING: |
2135 | if (v != SEQ_START_TOKEN) | 2131 | if (v != SEQ_START_TOKEN) |
2136 | inet_listen_unlock(&tcp_hashinfo); | 2132 | spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); |
2137 | break; | 2133 | break; |
2138 | case TCP_SEQ_STATE_TIME_WAIT: | 2134 | case TCP_SEQ_STATE_TIME_WAIT: |
2139 | case TCP_SEQ_STATE_ESTABLISHED: | 2135 | case TCP_SEQ_STATE_ESTABLISHED: |
2140 | if (v) | 2136 | if (v) |
2141 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2137 | spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2142 | break; | 2138 | break; |
2143 | } | 2139 | } |
2144 | } | 2140 | } |
@@ -2284,7 +2280,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw, | |||
2284 | 2280 | ||
2285 | static int tcp4_seq_show(struct seq_file *seq, void *v) | 2281 | static int tcp4_seq_show(struct seq_file *seq, void *v) |
2286 | { | 2282 | { |
2287 | struct tcp_iter_state* st; | 2283 | struct tcp_iter_state *st; |
2288 | int len; | 2284 | int len; |
2289 | 2285 | ||
2290 | if (v == SEQ_START_TOKEN) { | 2286 | if (v == SEQ_START_TOKEN) { |
@@ -2350,6 +2346,41 @@ void tcp4_proc_exit(void) | |||
2350 | } | 2346 | } |
2351 | #endif /* CONFIG_PROC_FS */ | 2347 | #endif /* CONFIG_PROC_FS */ |
2352 | 2348 | ||
2349 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | ||
2350 | { | ||
2351 | struct iphdr *iph = ip_hdr(skb); | ||
2352 | |||
2353 | switch (skb->ip_summed) { | ||
2354 | case CHECKSUM_COMPLETE: | ||
2355 | if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, | ||
2356 | skb->csum)) { | ||
2357 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
2358 | break; | ||
2359 | } | ||
2360 | |||
2361 | /* fall through */ | ||
2362 | case CHECKSUM_NONE: | ||
2363 | NAPI_GRO_CB(skb)->flush = 1; | ||
2364 | return NULL; | ||
2365 | } | ||
2366 | |||
2367 | return tcp_gro_receive(head, skb); | ||
2368 | } | ||
2369 | EXPORT_SYMBOL(tcp4_gro_receive); | ||
2370 | |||
2371 | int tcp4_gro_complete(struct sk_buff *skb) | ||
2372 | { | ||
2373 | struct iphdr *iph = ip_hdr(skb); | ||
2374 | struct tcphdr *th = tcp_hdr(skb); | ||
2375 | |||
2376 | th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), | ||
2377 | iph->saddr, iph->daddr, 0); | ||
2378 | skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | ||
2379 | |||
2380 | return tcp_gro_complete(skb); | ||
2381 | } | ||
2382 | EXPORT_SYMBOL(tcp4_gro_complete); | ||
2383 | |||
2353 | struct proto tcp_prot = { | 2384 | struct proto tcp_prot = { |
2354 | .name = "TCP", | 2385 | .name = "TCP", |
2355 | .owner = THIS_MODULE, | 2386 | .owner = THIS_MODULE, |
@@ -2378,6 +2409,7 @@ struct proto tcp_prot = { | |||
2378 | .sysctl_rmem = sysctl_tcp_rmem, | 2409 | .sysctl_rmem = sysctl_tcp_rmem, |
2379 | .max_header = MAX_TCP_HEADER, | 2410 | .max_header = MAX_TCP_HEADER, |
2380 | .obj_size = sizeof(struct tcp_sock), | 2411 | .obj_size = sizeof(struct tcp_sock), |
2412 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
2381 | .twsk_prot = &tcp_timewait_sock_ops, | 2413 | .twsk_prot = &tcp_timewait_sock_ops, |
2382 | .rsk_prot = &tcp_request_sock_ops, | 2414 | .rsk_prot = &tcp_request_sock_ops, |
2383 | .h.hashinfo = &tcp_hashinfo, | 2415 | .h.hashinfo = &tcp_hashinfo, |
@@ -2407,6 +2439,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { | |||
2407 | 2439 | ||
2408 | void __init tcp_v4_init(void) | 2440 | void __init tcp_v4_init(void) |
2409 | { | 2441 | { |
2442 | inet_hashinfo_init(&tcp_hashinfo); | ||
2410 | if (register_pernet_device(&tcp_sk_ops)) | 2443 | if (register_pernet_device(&tcp_sk_ops)) |
2411 | panic("Failed to create the TCP control socket.\n"); | 2444 | panic("Failed to create the TCP control socket.\n"); |
2412 | } | 2445 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 779f2e9d0689..f67effbb102b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -491,7 +491,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
491 | * as a request_sock. | 491 | * as a request_sock. |
492 | */ | 492 | */ |
493 | 493 | ||
494 | struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | 494 | struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, |
495 | struct request_sock *req, | 495 | struct request_sock *req, |
496 | struct request_sock **prev) | 496 | struct request_sock **prev) |
497 | { | 497 | { |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fe3b4bdfd251..557fe16cbfb0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -42,7 +42,7 @@ | |||
42 | /* People can turn this off for buggy TCP's found in printers etc. */ | 42 | /* People can turn this off for buggy TCP's found in printers etc. */ |
43 | int sysctl_tcp_retrans_collapse __read_mostly = 1; | 43 | int sysctl_tcp_retrans_collapse __read_mostly = 1; |
44 | 44 | ||
45 | /* People can turn this on to work with those rare, broken TCPs that | 45 | /* People can turn this on to work with those rare, broken TCPs that |
46 | * interpret the window field as a signed quantity. | 46 | * interpret the window field as a signed quantity. |
47 | */ | 47 | */ |
48 | int sysctl_tcp_workaround_signed_windows __read_mostly = 0; | 48 | int sysctl_tcp_workaround_signed_windows __read_mostly = 0; |
@@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
484 | } | 484 | } |
485 | if (likely(sysctl_tcp_window_scaling)) { | 485 | if (likely(sysctl_tcp_window_scaling)) { |
486 | opts->ws = tp->rx_opt.rcv_wscale; | 486 | opts->ws = tp->rx_opt.rcv_wscale; |
487 | if(likely(opts->ws)) | 487 | if (likely(opts->ws)) |
488 | size += TCPOLEN_WSCALE_ALIGNED; | 488 | size += TCPOLEN_WSCALE_ALIGNED; |
489 | } | 489 | } |
490 | if (likely(sysctl_tcp_sack)) { | 490 | if (likely(sysctl_tcp_sack)) { |
@@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
526 | 526 | ||
527 | if (likely(ireq->wscale_ok)) { | 527 | if (likely(ireq->wscale_ok)) { |
528 | opts->ws = ireq->rcv_wscale; | 528 | opts->ws = ireq->rcv_wscale; |
529 | if(likely(opts->ws)) | 529 | if (likely(opts->ws)) |
530 | size += TCPOLEN_WSCALE_ALIGNED; | 530 | size += TCPOLEN_WSCALE_ALIGNED; |
531 | } | 531 | } |
532 | if (likely(doing_ts)) { | 532 | if (likely(doing_ts)) { |
@@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
663 | th->urg_ptr = 0; | 663 | th->urg_ptr = 0; |
664 | 664 | ||
665 | /* The urg_mode check is necessary during a below snd_una win probe */ | 665 | /* The urg_mode check is necessary during a below snd_una win probe */ |
666 | if (unlikely(tcp_urg_mode(tp) && | 666 | if (unlikely(tcp_urg_mode(tp))) { |
667 | between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { | 667 | if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { |
668 | th->urg_ptr = htons(tp->snd_up - tcb->seq); | 668 | th->urg_ptr = htons(tp->snd_up - tcb->seq); |
669 | th->urg = 1; | 669 | th->urg = 1; |
670 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { | ||
671 | th->urg_ptr = 0xFFFF; | ||
672 | th->urg = 1; | ||
673 | } | ||
670 | } | 674 | } |
671 | 675 | ||
672 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 676 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
@@ -1168,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | |||
1168 | 1172 | ||
1169 | static inline int tcp_minshall_check(const struct tcp_sock *tp) | 1173 | static inline int tcp_minshall_check(const struct tcp_sock *tp) |
1170 | { | 1174 | { |
1171 | return after(tp->snd_sml,tp->snd_una) && | 1175 | return after(tp->snd_sml, tp->snd_una) && |
1172 | !after(tp->snd_sml, tp->snd_nxt); | 1176 | !after(tp->snd_sml, tp->snd_nxt); |
1173 | } | 1177 | } |
1174 | 1178 | ||
@@ -1334,7 +1338,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1334 | 1338 | ||
1335 | /* Defer for less than two clock ticks. */ | 1339 | /* Defer for less than two clock ticks. */ |
1336 | if (tp->tso_deferred && | 1340 | if (tp->tso_deferred && |
1337 | ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) | 1341 | (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) |
1338 | goto send_now; | 1342 | goto send_now; |
1339 | 1343 | ||
1340 | in_flight = tcp_packets_in_flight(tp); | 1344 | in_flight = tcp_packets_in_flight(tp); |
@@ -1519,7 +1523,8 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1519 | * Returns 1, if no segments are in flight and we have queued segments, but | 1523 | * Returns 1, if no segments are in flight and we have queued segments, but |
1520 | * cannot send anything now because of SWS or another problem. | 1524 | * cannot send anything now because of SWS or another problem. |
1521 | */ | 1525 | */ |
1522 | static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | 1526 | static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
1527 | int push_one, gfp_t gfp) | ||
1523 | { | 1528 | { |
1524 | struct tcp_sock *tp = tcp_sk(sk); | 1529 | struct tcp_sock *tp = tcp_sk(sk); |
1525 | struct sk_buff *skb; | 1530 | struct sk_buff *skb; |
@@ -1527,20 +1532,16 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1527 | int cwnd_quota; | 1532 | int cwnd_quota; |
1528 | int result; | 1533 | int result; |
1529 | 1534 | ||
1530 | /* If we are closed, the bytes will have to remain here. | ||
1531 | * In time closedown will finish, we empty the write queue and all | ||
1532 | * will be happy. | ||
1533 | */ | ||
1534 | if (unlikely(sk->sk_state == TCP_CLOSE)) | ||
1535 | return 0; | ||
1536 | |||
1537 | sent_pkts = 0; | 1535 | sent_pkts = 0; |
1538 | 1536 | ||
1539 | /* Do MTU probing. */ | 1537 | if (!push_one) { |
1540 | if ((result = tcp_mtu_probe(sk)) == 0) { | 1538 | /* Do MTU probing. */ |
1541 | return 0; | 1539 | result = tcp_mtu_probe(sk); |
1542 | } else if (result > 0) { | 1540 | if (!result) { |
1543 | sent_pkts = 1; | 1541 | return 0; |
1542 | } else if (result > 0) { | ||
1543 | sent_pkts = 1; | ||
1544 | } | ||
1544 | } | 1545 | } |
1545 | 1546 | ||
1546 | while ((skb = tcp_send_head(sk))) { | 1547 | while ((skb = tcp_send_head(sk))) { |
@@ -1562,7 +1563,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1562 | nonagle : TCP_NAGLE_PUSH)))) | 1563 | nonagle : TCP_NAGLE_PUSH)))) |
1563 | break; | 1564 | break; |
1564 | } else { | 1565 | } else { |
1565 | if (tcp_tso_should_defer(sk, skb)) | 1566 | if (!push_one && tcp_tso_should_defer(sk, skb)) |
1566 | break; | 1567 | break; |
1567 | } | 1568 | } |
1568 | 1569 | ||
@@ -1577,7 +1578,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1577 | 1578 | ||
1578 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1579 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1579 | 1580 | ||
1580 | if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) | 1581 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
1581 | break; | 1582 | break; |
1582 | 1583 | ||
1583 | /* Advance the send_head. This one is sent out. | 1584 | /* Advance the send_head. This one is sent out. |
@@ -1587,6 +1588,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1587 | 1588 | ||
1588 | tcp_minshall_update(tp, mss_now, skb); | 1589 | tcp_minshall_update(tp, mss_now, skb); |
1589 | sent_pkts++; | 1590 | sent_pkts++; |
1591 | |||
1592 | if (push_one) | ||
1593 | break; | ||
1590 | } | 1594 | } |
1591 | 1595 | ||
1592 | if (likely(sent_pkts)) { | 1596 | if (likely(sent_pkts)) { |
@@ -1605,10 +1609,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | |||
1605 | { | 1609 | { |
1606 | struct sk_buff *skb = tcp_send_head(sk); | 1610 | struct sk_buff *skb = tcp_send_head(sk); |
1607 | 1611 | ||
1608 | if (skb) { | 1612 | if (!skb) |
1609 | if (tcp_write_xmit(sk, cur_mss, nonagle)) | 1613 | return; |
1610 | tcp_check_probe_timer(sk); | 1614 | |
1611 | } | 1615 | /* If we are closed, the bytes will have to remain here. |
1616 | * In time closedown will finish, we empty the write queue and | ||
1617 | * all will be happy. | ||
1618 | */ | ||
1619 | if (unlikely(sk->sk_state == TCP_CLOSE)) | ||
1620 | return; | ||
1621 | |||
1622 | if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) | ||
1623 | tcp_check_probe_timer(sk); | ||
1612 | } | 1624 | } |
1613 | 1625 | ||
1614 | /* Send _single_ skb sitting at the send head. This function requires | 1626 | /* Send _single_ skb sitting at the send head. This function requires |
@@ -1616,38 +1628,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | |||
1616 | */ | 1628 | */ |
1617 | void tcp_push_one(struct sock *sk, unsigned int mss_now) | 1629 | void tcp_push_one(struct sock *sk, unsigned int mss_now) |
1618 | { | 1630 | { |
1619 | struct tcp_sock *tp = tcp_sk(sk); | ||
1620 | struct sk_buff *skb = tcp_send_head(sk); | 1631 | struct sk_buff *skb = tcp_send_head(sk); |
1621 | unsigned int tso_segs, cwnd_quota; | ||
1622 | 1632 | ||
1623 | BUG_ON(!skb || skb->len < mss_now); | 1633 | BUG_ON(!skb || skb->len < mss_now); |
1624 | 1634 | ||
1625 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1635 | tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); |
1626 | cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); | ||
1627 | |||
1628 | if (likely(cwnd_quota)) { | ||
1629 | unsigned int limit; | ||
1630 | |||
1631 | BUG_ON(!tso_segs); | ||
1632 | |||
1633 | limit = mss_now; | ||
1634 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | ||
1635 | limit = tcp_mss_split_point(sk, skb, mss_now, | ||
1636 | cwnd_quota); | ||
1637 | |||
1638 | if (skb->len > limit && | ||
1639 | unlikely(tso_fragment(sk, skb, limit, mss_now))) | ||
1640 | return; | ||
1641 | |||
1642 | /* Send it out now. */ | ||
1643 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
1644 | |||
1645 | if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { | ||
1646 | tcp_event_new_data_sent(sk, skb); | ||
1647 | tcp_cwnd_validate(sk); | ||
1648 | return; | ||
1649 | } | ||
1650 | } | ||
1651 | } | 1636 | } |
1652 | 1637 | ||
1653 | /* This function returns the amount that we can raise the | 1638 | /* This function returns the amount that we can raise the |
@@ -1767,46 +1752,22 @@ u32 __tcp_select_window(struct sock *sk) | |||
1767 | return window; | 1752 | return window; |
1768 | } | 1753 | } |
1769 | 1754 | ||
1770 | /* Attempt to collapse two adjacent SKB's during retransmission. */ | 1755 | /* Collapses two adjacent SKB's during retransmission. */ |
1771 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, | 1756 | static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
1772 | int mss_now) | ||
1773 | { | 1757 | { |
1774 | struct tcp_sock *tp = tcp_sk(sk); | 1758 | struct tcp_sock *tp = tcp_sk(sk); |
1775 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 1759 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1776 | int skb_size, next_skb_size; | 1760 | int skb_size, next_skb_size; |
1777 | u16 flags; | 1761 | u16 flags; |
1778 | 1762 | ||
1779 | /* The first test we must make is that neither of these two | ||
1780 | * SKB's are still referenced by someone else. | ||
1781 | */ | ||
1782 | if (skb_cloned(skb) || skb_cloned(next_skb)) | ||
1783 | return; | ||
1784 | |||
1785 | skb_size = skb->len; | 1763 | skb_size = skb->len; |
1786 | next_skb_size = next_skb->len; | 1764 | next_skb_size = next_skb->len; |
1787 | flags = TCP_SKB_CB(skb)->flags; | 1765 | flags = TCP_SKB_CB(skb)->flags; |
1788 | 1766 | ||
1789 | /* Also punt if next skb has been SACK'd. */ | ||
1790 | if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) | ||
1791 | return; | ||
1792 | |||
1793 | /* Next skb is out of window. */ | ||
1794 | if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) | ||
1795 | return; | ||
1796 | |||
1797 | /* Punt if not enough space exists in the first SKB for | ||
1798 | * the data in the second, or the total combined payload | ||
1799 | * would exceed the MSS. | ||
1800 | */ | ||
1801 | if ((next_skb_size > skb_tailroom(skb)) || | ||
1802 | ((skb_size + next_skb_size) > mss_now)) | ||
1803 | return; | ||
1804 | |||
1805 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 1767 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
1806 | 1768 | ||
1807 | tcp_highest_sack_combine(sk, next_skb, skb); | 1769 | tcp_highest_sack_combine(sk, next_skb, skb); |
1808 | 1770 | ||
1809 | /* Ok. We will be able to collapse the packet. */ | ||
1810 | tcp_unlink_write_queue(next_skb, sk); | 1771 | tcp_unlink_write_queue(next_skb, sk); |
1811 | 1772 | ||
1812 | skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), | 1773 | skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), |
@@ -1848,54 +1809,60 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, | |||
1848 | sk_wmem_free_skb(sk, next_skb); | 1809 | sk_wmem_free_skb(sk, next_skb); |
1849 | } | 1810 | } |
1850 | 1811 | ||
1851 | /* Do a simple retransmit without using the backoff mechanisms in | 1812 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) |
1852 | * tcp_timer. This is used for path mtu discovery. | 1813 | { |
1853 | * The socket is already locked here. | 1814 | if (tcp_skb_pcount(skb) > 1) |
1854 | */ | 1815 | return 0; |
1855 | void tcp_simple_retransmit(struct sock *sk) | 1816 | /* TODO: SACK collapsing could be used to remove this condition */ |
1817 | if (skb_shinfo(skb)->nr_frags != 0) | ||
1818 | return 0; | ||
1819 | if (skb_cloned(skb)) | ||
1820 | return 0; | ||
1821 | if (skb == tcp_send_head(sk)) | ||
1822 | return 0; | ||
1823 | /* Some heurestics for collapsing over SACK'd could be invented */ | ||
1824 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | ||
1825 | return 0; | ||
1826 | |||
1827 | return 1; | ||
1828 | } | ||
1829 | |||
1830 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | ||
1831 | int space) | ||
1856 | { | 1832 | { |
1857 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1858 | struct tcp_sock *tp = tcp_sk(sk); | 1833 | struct tcp_sock *tp = tcp_sk(sk); |
1859 | struct sk_buff *skb; | 1834 | struct sk_buff *skb = to, *tmp; |
1860 | unsigned int mss = tcp_current_mss(sk, 0); | 1835 | int first = 1; |
1861 | u32 prior_lost = tp->lost_out; | ||
1862 | 1836 | ||
1863 | tcp_for_write_queue(skb, sk) { | 1837 | if (!sysctl_tcp_retrans_collapse) |
1864 | if (skb == tcp_send_head(sk)) | 1838 | return; |
1839 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) | ||
1840 | return; | ||
1841 | |||
1842 | tcp_for_write_queue_from_safe(skb, tmp, sk) { | ||
1843 | if (!tcp_can_collapse(sk, skb)) | ||
1865 | break; | 1844 | break; |
1866 | if (skb->len > mss && | ||
1867 | !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | ||
1868 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | ||
1869 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
1870 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
1871 | } | ||
1872 | tcp_skb_mark_lost_uncond_verify(tp, skb); | ||
1873 | } | ||
1874 | } | ||
1875 | 1845 | ||
1876 | tcp_clear_retrans_hints_partial(tp); | 1846 | space -= skb->len; |
1877 | 1847 | ||
1878 | if (prior_lost == tp->lost_out) | 1848 | if (first) { |
1879 | return; | 1849 | first = 0; |
1850 | continue; | ||
1851 | } | ||
1880 | 1852 | ||
1881 | if (tcp_is_reno(tp)) | 1853 | if (space < 0) |
1882 | tcp_limit_reno_sacked(tp); | 1854 | break; |
1855 | /* Punt if not enough space exists in the first SKB for | ||
1856 | * the data in the second | ||
1857 | */ | ||
1858 | if (skb->len > skb_tailroom(to)) | ||
1859 | break; | ||
1883 | 1860 | ||
1884 | tcp_verify_left_out(tp); | 1861 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) |
1862 | break; | ||
1885 | 1863 | ||
1886 | /* Don't muck with the congestion window here. | 1864 | tcp_collapse_retrans(sk, to); |
1887 | * Reason is that we do not increase amount of _data_ | ||
1888 | * in network, but units changed and effective | ||
1889 | * cwnd/ssthresh really reduced now. | ||
1890 | */ | ||
1891 | if (icsk->icsk_ca_state != TCP_CA_Loss) { | ||
1892 | tp->high_seq = tp->snd_nxt; | ||
1893 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
1894 | tp->prior_ssthresh = 0; | ||
1895 | tp->undo_marker = 0; | ||
1896 | tcp_set_ca_state(sk, TCP_CA_Loss); | ||
1897 | } | 1865 | } |
1898 | tcp_xmit_retransmit_queue(sk); | ||
1899 | } | 1866 | } |
1900 | 1867 | ||
1901 | /* This retransmits one SKB. Policy decisions and retransmit queue | 1868 | /* This retransmits one SKB. Policy decisions and retransmit queue |
@@ -1947,17 +1914,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1947 | return -ENOMEM; /* We'll try again later. */ | 1914 | return -ENOMEM; /* We'll try again later. */ |
1948 | } | 1915 | } |
1949 | 1916 | ||
1950 | /* Collapse two adjacent packets if worthwhile and we can. */ | 1917 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
1951 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && | ||
1952 | (skb->len < (cur_mss >> 1)) && | ||
1953 | (!tcp_skb_is_last(sk, skb)) && | ||
1954 | (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && | ||
1955 | (skb_shinfo(skb)->nr_frags == 0 && | ||
1956 | skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && | ||
1957 | (tcp_skb_pcount(skb) == 1 && | ||
1958 | tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && | ||
1959 | (sysctl_tcp_retrans_collapse != 0)) | ||
1960 | tcp_retrans_try_collapse(sk, skb, cur_mss); | ||
1961 | 1918 | ||
1962 | /* Some Solaris stacks overoptimize and ignore the FIN on a | 1919 | /* Some Solaris stacks overoptimize and ignore the FIN on a |
1963 | * retransmit when old data is attached. So strip it off | 1920 | * retransmit when old data is attached. So strip it off |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 7ddc30f0744f..25524d4e372a 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -153,12 +153,11 @@ static int tcpprobe_sprint(char *tbuf, int n) | |||
153 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | 153 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); |
154 | 154 | ||
155 | return snprintf(tbuf, n, | 155 | return snprintf(tbuf, n, |
156 | "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u" | 156 | "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", |
157 | " %d %#x %#x %u %u %u %u\n", | ||
158 | (unsigned long) tv.tv_sec, | 157 | (unsigned long) tv.tv_sec, |
159 | (unsigned long) tv.tv_nsec, | 158 | (unsigned long) tv.tv_nsec, |
160 | NIPQUAD(p->saddr), ntohs(p->sport), | 159 | &p->saddr, ntohs(p->sport), |
161 | NIPQUAD(p->daddr), ntohs(p->dport), | 160 | &p->daddr, ntohs(p->dport), |
162 | p->length, p->snd_nxt, p->snd_una, | 161 | p->length, p->snd_nxt, p->snd_una, |
163 | p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); | 162 | p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); |
164 | } | 163 | } |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6b6dff1164b9..0170e914f1b0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) | |||
65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) | 65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
66 | { | 66 | { |
67 | struct tcp_sock *tp = tcp_sk(sk); | 67 | struct tcp_sock *tp = tcp_sk(sk); |
68 | int orphans = atomic_read(&tcp_orphan_count); | 68 | int orphans = percpu_counter_read_positive(&tcp_orphan_count); |
69 | 69 | ||
70 | /* If peer does not open window for long time, or did not transmit | 70 | /* If peer does not open window for long time, or did not transmit |
71 | * anything for long time, penalize it. */ | 71 | * anything for long time, penalize it. */ |
@@ -171,7 +171,7 @@ static int tcp_write_timeout(struct sock *sk) | |||
171 | 171 | ||
172 | static void tcp_delack_timer(unsigned long data) | 172 | static void tcp_delack_timer(unsigned long data) |
173 | { | 173 | { |
174 | struct sock *sk = (struct sock*)data; | 174 | struct sock *sk = (struct sock *)data; |
175 | struct tcp_sock *tp = tcp_sk(sk); | 175 | struct tcp_sock *tp = tcp_sk(sk); |
176 | struct inet_connection_sock *icsk = inet_csk(sk); | 176 | struct inet_connection_sock *icsk = inet_csk(sk); |
177 | 177 | ||
@@ -299,15 +299,15 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
299 | #ifdef TCP_DEBUG | 299 | #ifdef TCP_DEBUG |
300 | struct inet_sock *inet = inet_sk(sk); | 300 | struct inet_sock *inet = inet_sk(sk); |
301 | if (sk->sk_family == AF_INET) { | 301 | if (sk->sk_family == AF_INET) { |
302 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", | 302 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
303 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 303 | &inet->daddr, ntohs(inet->dport), |
304 | inet->num, tp->snd_una, tp->snd_nxt); | 304 | inet->num, tp->snd_una, tp->snd_nxt); |
305 | } | 305 | } |
306 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 306 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
307 | else if (sk->sk_family == AF_INET6) { | 307 | else if (sk->sk_family == AF_INET6) { |
308 | struct ipv6_pinfo *np = inet6_sk(sk); | 308 | struct ipv6_pinfo *np = inet6_sk(sk); |
309 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", | 309 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
310 | NIP6(np->daddr), ntohs(inet->dport), | 310 | &np->daddr, ntohs(inet->dport), |
311 | inet->num, tp->snd_una, tp->snd_nxt); | 311 | inet->num, tp->snd_una, tp->snd_nxt); |
312 | } | 312 | } |
313 | #endif | 313 | #endif |
@@ -396,7 +396,7 @@ out:; | |||
396 | 396 | ||
397 | static void tcp_write_timer(unsigned long data) | 397 | static void tcp_write_timer(unsigned long data) |
398 | { | 398 | { |
399 | struct sock *sk = (struct sock*)data; | 399 | struct sock *sk = (struct sock *)data; |
400 | struct inet_connection_sock *icsk = inet_csk(sk); | 400 | struct inet_connection_sock *icsk = inet_csk(sk); |
401 | int event; | 401 | int event; |
402 | 402 | ||
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e03b10183a8b..9ec843a9bbb2 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -83,7 +83,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
83 | else if (!yeah->doing_reno_now) { | 83 | else if (!yeah->doing_reno_now) { |
84 | /* Scalable */ | 84 | /* Scalable */ |
85 | 85 | ||
86 | tp->snd_cwnd_cnt+=yeah->pkts_acked; | 86 | tp->snd_cwnd_cnt += yeah->pkts_acked; |
87 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ | 87 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ |
88 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 88 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
89 | tp->snd_cwnd++; | 89 | tp->snd_cwnd++; |
@@ -224,7 +224,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { | |||
224 | 224 | ||
225 | reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); | 225 | reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); |
226 | } else | 226 | } else |
227 | reduction = max(tp->snd_cwnd>>1,2U); | 227 | reduction = max(tp->snd_cwnd>>1, 2U); |
228 | 228 | ||
229 | yeah->fast_count = 0; | 229 | yeah->fast_count = 0; |
230 | yeah->reno_count = max(yeah->reno_count>>1, 2U); | 230 | yeah->reno_count = max(yeah->reno_count>>1, 2U); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 98c1fd09be88..cf5ab0581eba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -81,6 +81,8 @@ | |||
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | #include <asm/ioctls.h> | 82 | #include <asm/ioctls.h> |
83 | #include <linux/bootmem.h> | 83 | #include <linux/bootmem.h> |
84 | #include <linux/highmem.h> | ||
85 | #include <linux/swap.h> | ||
84 | #include <linux/types.h> | 86 | #include <linux/types.h> |
85 | #include <linux/fcntl.h> | 87 | #include <linux/fcntl.h> |
86 | #include <linux/module.h> | 88 | #include <linux/module.h> |
@@ -104,12 +106,8 @@ | |||
104 | #include <net/xfrm.h> | 106 | #include <net/xfrm.h> |
105 | #include "udp_impl.h" | 107 | #include "udp_impl.h" |
106 | 108 | ||
107 | /* | 109 | struct udp_table udp_table; |
108 | * Snmp MIB for the UDP layer | 110 | EXPORT_SYMBOL(udp_table); |
109 | */ | ||
110 | |||
111 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | ||
112 | DEFINE_RWLOCK(udp_hash_lock); | ||
113 | 111 | ||
114 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
115 | int sysctl_udp_rmem_min __read_mostly; | 113 | int sysctl_udp_rmem_min __read_mostly; |
@@ -123,15 +121,15 @@ atomic_t udp_memory_allocated; | |||
123 | EXPORT_SYMBOL(udp_memory_allocated); | 121 | EXPORT_SYMBOL(udp_memory_allocated); |
124 | 122 | ||
125 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 123 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
126 | const struct hlist_head udptable[], | 124 | const struct udp_hslot *hslot, |
127 | struct sock *sk, | 125 | struct sock *sk, |
128 | int (*saddr_comp)(const struct sock *sk1, | 126 | int (*saddr_comp)(const struct sock *sk1, |
129 | const struct sock *sk2)) | 127 | const struct sock *sk2)) |
130 | { | 128 | { |
131 | struct sock *sk2; | 129 | struct sock *sk2; |
132 | struct hlist_node *node; | 130 | struct hlist_nulls_node *node; |
133 | 131 | ||
134 | sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) | 132 | sk_nulls_for_each(sk2, node, &hslot->head) |
135 | if (net_eq(sock_net(sk2), net) && | 133 | if (net_eq(sock_net(sk2), net) && |
136 | sk2 != sk && | 134 | sk2 != sk && |
137 | sk2->sk_hash == num && | 135 | sk2->sk_hash == num && |
@@ -154,12 +152,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
154 | int (*saddr_comp)(const struct sock *sk1, | 152 | int (*saddr_comp)(const struct sock *sk1, |
155 | const struct sock *sk2 ) ) | 153 | const struct sock *sk2 ) ) |
156 | { | 154 | { |
157 | struct hlist_head *udptable = sk->sk_prot->h.udp_hash; | 155 | struct udp_hslot *hslot; |
156 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | ||
158 | int error = 1; | 157 | int error = 1; |
159 | struct net *net = sock_net(sk); | 158 | struct net *net = sock_net(sk); |
160 | 159 | ||
161 | write_lock_bh(&udp_hash_lock); | ||
162 | |||
163 | if (!snum) { | 160 | if (!snum) { |
164 | int low, high, remaining; | 161 | int low, high, remaining; |
165 | unsigned rand; | 162 | unsigned rand; |
@@ -171,26 +168,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
171 | rand = net_random(); | 168 | rand = net_random(); |
172 | snum = first = rand % remaining + low; | 169 | snum = first = rand % remaining + low; |
173 | rand |= 1; | 170 | rand |= 1; |
174 | while (udp_lib_lport_inuse(net, snum, udptable, sk, | 171 | for (;;) { |
175 | saddr_comp)) { | 172 | hslot = &udptable->hash[udp_hashfn(net, snum)]; |
173 | spin_lock_bh(&hslot->lock); | ||
174 | if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | ||
175 | break; | ||
176 | spin_unlock_bh(&hslot->lock); | ||
176 | do { | 177 | do { |
177 | snum = snum + rand; | 178 | snum = snum + rand; |
178 | } while (snum < low || snum > high); | 179 | } while (snum < low || snum > high); |
179 | if (snum == first) | 180 | if (snum == first) |
180 | goto fail; | 181 | goto fail; |
181 | } | 182 | } |
182 | } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) | 183 | } else { |
183 | goto fail; | 184 | hslot = &udptable->hash[udp_hashfn(net, snum)]; |
184 | 185 | spin_lock_bh(&hslot->lock); | |
186 | if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | ||
187 | goto fail_unlock; | ||
188 | } | ||
185 | inet_sk(sk)->num = snum; | 189 | inet_sk(sk)->num = snum; |
186 | sk->sk_hash = snum; | 190 | sk->sk_hash = snum; |
187 | if (sk_unhashed(sk)) { | 191 | if (sk_unhashed(sk)) { |
188 | sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); | 192 | sk_nulls_add_node_rcu(sk, &hslot->head); |
189 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 193 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
190 | } | 194 | } |
191 | error = 0; | 195 | error = 0; |
196 | fail_unlock: | ||
197 | spin_unlock_bh(&hslot->lock); | ||
192 | fail: | 198 | fail: |
193 | write_unlock_bh(&udp_hash_lock); | ||
194 | return error; | 199 | return error; |
195 | } | 200 | } |
196 | 201 | ||
@@ -208,63 +213,91 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
208 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); | 213 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); |
209 | } | 214 | } |
210 | 215 | ||
216 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | ||
217 | unsigned short hnum, | ||
218 | __be16 sport, __be32 daddr, __be16 dport, int dif) | ||
219 | { | ||
220 | int score = -1; | ||
221 | |||
222 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | ||
223 | !ipv6_only_sock(sk)) { | ||
224 | struct inet_sock *inet = inet_sk(sk); | ||
225 | |||
226 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
227 | if (inet->rcv_saddr) { | ||
228 | if (inet->rcv_saddr != daddr) | ||
229 | return -1; | ||
230 | score += 2; | ||
231 | } | ||
232 | if (inet->daddr) { | ||
233 | if (inet->daddr != saddr) | ||
234 | return -1; | ||
235 | score += 2; | ||
236 | } | ||
237 | if (inet->dport) { | ||
238 | if (inet->dport != sport) | ||
239 | return -1; | ||
240 | score += 2; | ||
241 | } | ||
242 | if (sk->sk_bound_dev_if) { | ||
243 | if (sk->sk_bound_dev_if != dif) | ||
244 | return -1; | ||
245 | score += 2; | ||
246 | } | ||
247 | } | ||
248 | return score; | ||
249 | } | ||
250 | |||
211 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 251 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
212 | * harder than this. -DaveM | 252 | * harder than this. -DaveM |
213 | */ | 253 | */ |
214 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | 254 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, |
215 | __be16 sport, __be32 daddr, __be16 dport, | 255 | __be16 sport, __be32 daddr, __be16 dport, |
216 | int dif, struct hlist_head udptable[]) | 256 | int dif, struct udp_table *udptable) |
217 | { | 257 | { |
218 | struct sock *sk, *result = NULL; | 258 | struct sock *sk, *result; |
219 | struct hlist_node *node; | 259 | struct hlist_nulls_node *node; |
220 | unsigned short hnum = ntohs(dport); | 260 | unsigned short hnum = ntohs(dport); |
221 | int badness = -1; | 261 | unsigned int hash = udp_hashfn(net, hnum); |
222 | 262 | struct udp_hslot *hslot = &udptable->hash[hash]; | |
223 | read_lock(&udp_hash_lock); | 263 | int score, badness; |
224 | sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { | 264 | |
225 | struct inet_sock *inet = inet_sk(sk); | 265 | rcu_read_lock(); |
226 | 266 | begin: | |
227 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 267 | result = NULL; |
228 | !ipv6_only_sock(sk)) { | 268 | badness = -1; |
229 | int score = (sk->sk_family == PF_INET ? 1 : 0); | 269 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
230 | if (inet->rcv_saddr) { | 270 | score = compute_score(sk, net, saddr, hnum, sport, |
231 | if (inet->rcv_saddr != daddr) | 271 | daddr, dport, dif); |
232 | continue; | 272 | if (score > badness) { |
233 | score+=2; | 273 | result = sk; |
234 | } | 274 | badness = score; |
235 | if (inet->daddr) { | ||
236 | if (inet->daddr != saddr) | ||
237 | continue; | ||
238 | score+=2; | ||
239 | } | ||
240 | if (inet->dport) { | ||
241 | if (inet->dport != sport) | ||
242 | continue; | ||
243 | score+=2; | ||
244 | } | ||
245 | if (sk->sk_bound_dev_if) { | ||
246 | if (sk->sk_bound_dev_if != dif) | ||
247 | continue; | ||
248 | score+=2; | ||
249 | } | ||
250 | if (score == 9) { | ||
251 | result = sk; | ||
252 | break; | ||
253 | } else if (score > badness) { | ||
254 | result = sk; | ||
255 | badness = score; | ||
256 | } | ||
257 | } | 275 | } |
258 | } | 276 | } |
259 | if (result) | 277 | /* |
260 | sock_hold(result); | 278 | * if the nulls value we got at the end of this lookup is |
261 | read_unlock(&udp_hash_lock); | 279 | * not the expected one, we must restart lookup. |
280 | * We probably met an item that was moved to another chain. | ||
281 | */ | ||
282 | if (get_nulls_value(node) != hash) | ||
283 | goto begin; | ||
284 | |||
285 | if (result) { | ||
286 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
287 | result = NULL; | ||
288 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, | ||
289 | daddr, dport, dif) < badness)) { | ||
290 | sock_put(result); | ||
291 | goto begin; | ||
292 | } | ||
293 | } | ||
294 | rcu_read_unlock(); | ||
262 | return result; | 295 | return result; |
263 | } | 296 | } |
264 | 297 | ||
265 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | 298 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, |
266 | __be16 sport, __be16 dport, | 299 | __be16 sport, __be16 dport, |
267 | struct hlist_head udptable[]) | 300 | struct udp_table *udptable) |
268 | { | 301 | { |
269 | struct sock *sk; | 302 | struct sock *sk; |
270 | const struct iphdr *iph = ip_hdr(skb); | 303 | const struct iphdr *iph = ip_hdr(skb); |
@@ -280,7 +313,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | |||
280 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, | 313 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, |
281 | __be32 daddr, __be16 dport, int dif) | 314 | __be32 daddr, __be16 dport, int dif) |
282 | { | 315 | { |
283 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); | 316 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); |
284 | } | 317 | } |
285 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); | 318 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); |
286 | 319 | ||
@@ -289,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | |||
289 | __be16 rmt_port, __be32 rmt_addr, | 322 | __be16 rmt_port, __be32 rmt_addr, |
290 | int dif) | 323 | int dif) |
291 | { | 324 | { |
292 | struct hlist_node *node; | 325 | struct hlist_nulls_node *node; |
293 | struct sock *s = sk; | 326 | struct sock *s = sk; |
294 | unsigned short hnum = ntohs(loc_port); | 327 | unsigned short hnum = ntohs(loc_port); |
295 | 328 | ||
296 | sk_for_each_from(s, node) { | 329 | sk_nulls_for_each_from(s, node) { |
297 | struct inet_sock *inet = inet_sk(s); | 330 | struct inet_sock *inet = inet_sk(s); |
298 | 331 | ||
299 | if (!net_eq(sock_net(s), net) || | 332 | if (!net_eq(sock_net(s), net) || |
@@ -324,7 +357,7 @@ found: | |||
324 | * to find the appropriate port. | 357 | * to find the appropriate port. |
325 | */ | 358 | */ |
326 | 359 | ||
327 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | 360 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
328 | { | 361 | { |
329 | struct inet_sock *inet; | 362 | struct inet_sock *inet; |
330 | struct iphdr *iph = (struct iphdr*)skb->data; | 363 | struct iphdr *iph = (struct iphdr*)skb->data; |
@@ -393,7 +426,7 @@ out: | |||
393 | 426 | ||
394 | void udp_err(struct sk_buff *skb, u32 info) | 427 | void udp_err(struct sk_buff *skb, u32 info) |
395 | { | 428 | { |
396 | __udp4_lib_err(skb, info, udp_hash); | 429 | __udp4_lib_err(skb, info, &udp_table); |
397 | } | 430 | } |
398 | 431 | ||
399 | /* | 432 | /* |
@@ -686,7 +719,7 @@ do_append_data: | |||
686 | up->len += ulen; | 719 | up->len += ulen; |
687 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | 720 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
688 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | 721 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
689 | sizeof(struct udphdr), &ipc, rt, | 722 | sizeof(struct udphdr), &ipc, &rt, |
690 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 723 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
691 | if (err) | 724 | if (err) |
692 | udp_flush_pending_frames(sk); | 725 | udp_flush_pending_frames(sk); |
@@ -935,6 +968,23 @@ int udp_disconnect(struct sock *sk, int flags) | |||
935 | return 0; | 968 | return 0; |
936 | } | 969 | } |
937 | 970 | ||
971 | void udp_lib_unhash(struct sock *sk) | ||
972 | { | ||
973 | if (sk_hashed(sk)) { | ||
974 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | ||
975 | unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); | ||
976 | struct udp_hslot *hslot = &udptable->hash[hash]; | ||
977 | |||
978 | spin_lock_bh(&hslot->lock); | ||
979 | if (sk_nulls_del_node_init_rcu(sk)) { | ||
980 | inet_sk(sk)->num = 0; | ||
981 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
982 | } | ||
983 | spin_unlock_bh(&hslot->lock); | ||
984 | } | ||
985 | } | ||
986 | EXPORT_SYMBOL(udp_lib_unhash); | ||
987 | |||
938 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 988 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
939 | { | 989 | { |
940 | int is_udplite = IS_UDPLITE(sk); | 990 | int is_udplite = IS_UDPLITE(sk); |
@@ -1073,13 +1123,14 @@ drop: | |||
1073 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1123 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1074 | struct udphdr *uh, | 1124 | struct udphdr *uh, |
1075 | __be32 saddr, __be32 daddr, | 1125 | __be32 saddr, __be32 daddr, |
1076 | struct hlist_head udptable[]) | 1126 | struct udp_table *udptable) |
1077 | { | 1127 | { |
1078 | struct sock *sk; | 1128 | struct sock *sk; |
1129 | struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; | ||
1079 | int dif; | 1130 | int dif; |
1080 | 1131 | ||
1081 | read_lock(&udp_hash_lock); | 1132 | spin_lock(&hslot->lock); |
1082 | sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); | 1133 | sk = sk_nulls_head(&hslot->head); |
1083 | dif = skb->dev->ifindex; | 1134 | dif = skb->dev->ifindex; |
1084 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1135 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1085 | if (sk) { | 1136 | if (sk) { |
@@ -1088,7 +1139,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1088 | do { | 1139 | do { |
1089 | struct sk_buff *skb1 = skb; | 1140 | struct sk_buff *skb1 = skb; |
1090 | 1141 | ||
1091 | sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, | 1142 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, |
1092 | daddr, uh->source, saddr, | 1143 | daddr, uh->source, saddr, |
1093 | dif); | 1144 | dif); |
1094 | if (sknext) | 1145 | if (sknext) |
@@ -1105,7 +1156,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1105 | } while (sknext); | 1156 | } while (sknext); |
1106 | } else | 1157 | } else |
1107 | kfree_skb(skb); | 1158 | kfree_skb(skb); |
1108 | read_unlock(&udp_hash_lock); | 1159 | spin_unlock(&hslot->lock); |
1109 | return 0; | 1160 | return 0; |
1110 | } | 1161 | } |
1111 | 1162 | ||
@@ -1151,7 +1202,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1151 | * All we need to do is get the socket, and then do a checksum. | 1202 | * All we need to do is get the socket, and then do a checksum. |
1152 | */ | 1203 | */ |
1153 | 1204 | ||
1154 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | 1205 | int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, |
1155 | int proto) | 1206 | int proto) |
1156 | { | 1207 | { |
1157 | struct sock *sk; | 1208 | struct sock *sk; |
@@ -1219,13 +1270,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1219 | return 0; | 1270 | return 0; |
1220 | 1271 | ||
1221 | short_packet: | 1272 | short_packet: |
1222 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n", | 1273 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", |
1223 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1274 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1224 | NIPQUAD(saddr), | 1275 | &saddr, |
1225 | ntohs(uh->source), | 1276 | ntohs(uh->source), |
1226 | ulen, | 1277 | ulen, |
1227 | skb->len, | 1278 | skb->len, |
1228 | NIPQUAD(daddr), | 1279 | &daddr, |
1229 | ntohs(uh->dest)); | 1280 | ntohs(uh->dest)); |
1230 | goto drop; | 1281 | goto drop; |
1231 | 1282 | ||
@@ -1234,11 +1285,11 @@ csum_error: | |||
1234 | * RFC1122: OK. Discards the bad packet silently (as far as | 1285 | * RFC1122: OK. Discards the bad packet silently (as far as |
1235 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1286 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1236 | */ | 1287 | */ |
1237 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n", | 1288 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", |
1238 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1289 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1239 | NIPQUAD(saddr), | 1290 | &saddr, |
1240 | ntohs(uh->source), | 1291 | ntohs(uh->source), |
1241 | NIPQUAD(daddr), | 1292 | &daddr, |
1242 | ntohs(uh->dest), | 1293 | ntohs(uh->dest), |
1243 | ulen); | 1294 | ulen); |
1244 | drop: | 1295 | drop: |
@@ -1249,7 +1300,7 @@ drop: | |||
1249 | 1300 | ||
1250 | int udp_rcv(struct sk_buff *skb) | 1301 | int udp_rcv(struct sk_buff *skb) |
1251 | { | 1302 | { |
1252 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | 1303 | return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); |
1253 | } | 1304 | } |
1254 | 1305 | ||
1255 | void udp_destroy_sock(struct sock *sk) | 1306 | void udp_destroy_sock(struct sock *sk) |
@@ -1491,7 +1542,8 @@ struct proto udp_prot = { | |||
1491 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1542 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1492 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1543 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1493 | .obj_size = sizeof(struct udp_sock), | 1544 | .obj_size = sizeof(struct udp_sock), |
1494 | .h.udp_hash = udp_hash, | 1545 | .slab_flags = SLAB_DESTROY_BY_RCU, |
1546 | .h.udp_table = &udp_table, | ||
1495 | #ifdef CONFIG_COMPAT | 1547 | #ifdef CONFIG_COMPAT |
1496 | .compat_setsockopt = compat_udp_setsockopt, | 1548 | .compat_setsockopt = compat_udp_setsockopt, |
1497 | .compat_getsockopt = compat_udp_getsockopt, | 1549 | .compat_getsockopt = compat_udp_getsockopt, |
@@ -1501,20 +1553,23 @@ struct proto udp_prot = { | |||
1501 | /* ------------------------------------------------------------------------ */ | 1553 | /* ------------------------------------------------------------------------ */ |
1502 | #ifdef CONFIG_PROC_FS | 1554 | #ifdef CONFIG_PROC_FS |
1503 | 1555 | ||
1504 | static struct sock *udp_get_first(struct seq_file *seq) | 1556 | static struct sock *udp_get_first(struct seq_file *seq, int start) |
1505 | { | 1557 | { |
1506 | struct sock *sk; | 1558 | struct sock *sk; |
1507 | struct udp_iter_state *state = seq->private; | 1559 | struct udp_iter_state *state = seq->private; |
1508 | struct net *net = seq_file_net(seq); | 1560 | struct net *net = seq_file_net(seq); |
1509 | 1561 | ||
1510 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1562 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1511 | struct hlist_node *node; | 1563 | struct hlist_nulls_node *node; |
1512 | sk_for_each(sk, node, state->hashtable + state->bucket) { | 1564 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1565 | spin_lock_bh(&hslot->lock); | ||
1566 | sk_nulls_for_each(sk, node, &hslot->head) { | ||
1513 | if (!net_eq(sock_net(sk), net)) | 1567 | if (!net_eq(sock_net(sk), net)) |
1514 | continue; | 1568 | continue; |
1515 | if (sk->sk_family == state->family) | 1569 | if (sk->sk_family == state->family) |
1516 | goto found; | 1570 | goto found; |
1517 | } | 1571 | } |
1572 | spin_unlock_bh(&hslot->lock); | ||
1518 | } | 1573 | } |
1519 | sk = NULL; | 1574 | sk = NULL; |
1520 | found: | 1575 | found: |
@@ -1527,21 +1582,19 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1527 | struct net *net = seq_file_net(seq); | 1582 | struct net *net = seq_file_net(seq); |
1528 | 1583 | ||
1529 | do { | 1584 | do { |
1530 | sk = sk_next(sk); | 1585 | sk = sk_nulls_next(sk); |
1531 | try_again: | ||
1532 | ; | ||
1533 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1586 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1534 | 1587 | ||
1535 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { | 1588 | if (!sk) { |
1536 | sk = sk_head(state->hashtable + state->bucket); | 1589 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1537 | goto try_again; | 1590 | return udp_get_first(seq, state->bucket + 1); |
1538 | } | 1591 | } |
1539 | return sk; | 1592 | return sk; |
1540 | } | 1593 | } |
1541 | 1594 | ||
1542 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | 1595 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) |
1543 | { | 1596 | { |
1544 | struct sock *sk = udp_get_first(seq); | 1597 | struct sock *sk = udp_get_first(seq, 0); |
1545 | 1598 | ||
1546 | if (sk) | 1599 | if (sk) |
1547 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) | 1600 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) |
@@ -1550,9 +1603,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1550 | } | 1603 | } |
1551 | 1604 | ||
1552 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1605 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1553 | __acquires(udp_hash_lock) | ||
1554 | { | 1606 | { |
1555 | read_lock(&udp_hash_lock); | ||
1556 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1607 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1557 | } | 1608 | } |
1558 | 1609 | ||
@@ -1570,9 +1621,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1570 | } | 1621 | } |
1571 | 1622 | ||
1572 | static void udp_seq_stop(struct seq_file *seq, void *v) | 1623 | static void udp_seq_stop(struct seq_file *seq, void *v) |
1573 | __releases(udp_hash_lock) | ||
1574 | { | 1624 | { |
1575 | read_unlock(&udp_hash_lock); | 1625 | struct udp_iter_state *state = seq->private; |
1626 | |||
1627 | if (state->bucket < UDP_HTABLE_SIZE) | ||
1628 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | ||
1576 | } | 1629 | } |
1577 | 1630 | ||
1578 | static int udp_seq_open(struct inode *inode, struct file *file) | 1631 | static int udp_seq_open(struct inode *inode, struct file *file) |
@@ -1588,7 +1641,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) | |||
1588 | 1641 | ||
1589 | s = ((struct seq_file *)file->private_data)->private; | 1642 | s = ((struct seq_file *)file->private_data)->private; |
1590 | s->family = afinfo->family; | 1643 | s->family = afinfo->family; |
1591 | s->hashtable = afinfo->hashtable; | 1644 | s->udp_table = afinfo->udp_table; |
1592 | return err; | 1645 | return err; |
1593 | } | 1646 | } |
1594 | 1647 | ||
@@ -1660,7 +1713,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) | |||
1660 | static struct udp_seq_afinfo udp4_seq_afinfo = { | 1713 | static struct udp_seq_afinfo udp4_seq_afinfo = { |
1661 | .name = "udp", | 1714 | .name = "udp", |
1662 | .family = AF_INET, | 1715 | .family = AF_INET, |
1663 | .hashtable = udp_hash, | 1716 | .udp_table = &udp_table, |
1664 | .seq_fops = { | 1717 | .seq_fops = { |
1665 | .owner = THIS_MODULE, | 1718 | .owner = THIS_MODULE, |
1666 | }, | 1719 | }, |
@@ -1695,16 +1748,28 @@ void udp4_proc_exit(void) | |||
1695 | } | 1748 | } |
1696 | #endif /* CONFIG_PROC_FS */ | 1749 | #endif /* CONFIG_PROC_FS */ |
1697 | 1750 | ||
1751 | void __init udp_table_init(struct udp_table *table) | ||
1752 | { | ||
1753 | int i; | ||
1754 | |||
1755 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | ||
1756 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | ||
1757 | spin_lock_init(&table->hash[i].lock); | ||
1758 | } | ||
1759 | } | ||
1760 | |||
1698 | void __init udp_init(void) | 1761 | void __init udp_init(void) |
1699 | { | 1762 | { |
1700 | unsigned long limit; | 1763 | unsigned long nr_pages, limit; |
1701 | 1764 | ||
1765 | udp_table_init(&udp_table); | ||
1702 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 1766 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1703 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 1767 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1704 | * toward zero with the amount of memory, with a floor of 128 pages. | 1768 | * toward zero with the amount of memory, with a floor of 128 pages. |
1705 | */ | 1769 | */ |
1706 | limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | 1770 | nr_pages = totalram_pages - totalhigh_pages; |
1707 | limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | 1771 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); |
1772 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
1708 | limit = max(limit, 128UL); | 1773 | limit = max(limit, 128UL); |
1709 | sysctl_udp_mem[0] = limit / 4 * 3; | 1774 | sysctl_udp_mem[0] = limit / 4 * 3; |
1710 | sysctl_udp_mem[1] = limit; | 1775 | sysctl_udp_mem[1] = limit; |
@@ -1715,8 +1780,6 @@ void __init udp_init(void) | |||
1715 | } | 1780 | } |
1716 | 1781 | ||
1717 | EXPORT_SYMBOL(udp_disconnect); | 1782 | EXPORT_SYMBOL(udp_disconnect); |
1718 | EXPORT_SYMBOL(udp_hash); | ||
1719 | EXPORT_SYMBOL(udp_hash_lock); | ||
1720 | EXPORT_SYMBOL(udp_ioctl); | 1783 | EXPORT_SYMBOL(udp_ioctl); |
1721 | EXPORT_SYMBOL(udp_prot); | 1784 | EXPORT_SYMBOL(udp_prot); |
1722 | EXPORT_SYMBOL(udp_sendmsg); | 1785 | EXPORT_SYMBOL(udp_sendmsg); |
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2e9bad2fa1bc..9f4a6165f722 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h | |||
@@ -5,8 +5,8 @@ | |||
5 | #include <net/protocol.h> | 5 | #include <net/protocol.h> |
6 | #include <net/inet_common.h> | 6 | #include <net/inet_common.h> |
7 | 7 | ||
8 | extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); | 8 | extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int ); |
9 | extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); | 9 | extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); |
10 | 10 | ||
11 | extern int udp_v4_get_port(struct sock *sk, unsigned short snum); | 11 | extern int udp_v4_get_port(struct sock *sk, unsigned short snum); |
12 | 12 | ||
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3c807964da96..c784891cb7e5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -12,16 +12,17 @@ | |||
12 | */ | 12 | */ |
13 | #include "udp_impl.h" | 13 | #include "udp_impl.h" |
14 | 14 | ||
15 | struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; | 15 | struct udp_table udplite_table; |
16 | EXPORT_SYMBOL(udplite_table); | ||
16 | 17 | ||
17 | static int udplite_rcv(struct sk_buff *skb) | 18 | static int udplite_rcv(struct sk_buff *skb) |
18 | { | 19 | { |
19 | return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); | 20 | return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); |
20 | } | 21 | } |
21 | 22 | ||
22 | static void udplite_err(struct sk_buff *skb, u32 info) | 23 | static void udplite_err(struct sk_buff *skb, u32 info) |
23 | { | 24 | { |
24 | __udp4_lib_err(skb, info, udplite_hash); | 25 | __udp4_lib_err(skb, info, &udplite_table); |
25 | } | 26 | } |
26 | 27 | ||
27 | static struct net_protocol udplite_protocol = { | 28 | static struct net_protocol udplite_protocol = { |
@@ -50,7 +51,8 @@ struct proto udplite_prot = { | |||
50 | .unhash = udp_lib_unhash, | 51 | .unhash = udp_lib_unhash, |
51 | .get_port = udp_v4_get_port, | 52 | .get_port = udp_v4_get_port, |
52 | .obj_size = sizeof(struct udp_sock), | 53 | .obj_size = sizeof(struct udp_sock), |
53 | .h.udp_hash = udplite_hash, | 54 | .slab_flags = SLAB_DESTROY_BY_RCU, |
55 | .h.udp_table = &udplite_table, | ||
54 | #ifdef CONFIG_COMPAT | 56 | #ifdef CONFIG_COMPAT |
55 | .compat_setsockopt = compat_udp_setsockopt, | 57 | .compat_setsockopt = compat_udp_setsockopt, |
56 | .compat_getsockopt = compat_udp_getsockopt, | 58 | .compat_getsockopt = compat_udp_getsockopt, |
@@ -71,7 +73,7 @@ static struct inet_protosw udplite4_protosw = { | |||
71 | static struct udp_seq_afinfo udplite4_seq_afinfo = { | 73 | static struct udp_seq_afinfo udplite4_seq_afinfo = { |
72 | .name = "udplite", | 74 | .name = "udplite", |
73 | .family = AF_INET, | 75 | .family = AF_INET, |
74 | .hashtable = udplite_hash, | 76 | .udp_table = &udplite_table, |
75 | .seq_fops = { | 77 | .seq_fops = { |
76 | .owner = THIS_MODULE, | 78 | .owner = THIS_MODULE, |
77 | }, | 79 | }, |
@@ -108,6 +110,7 @@ static inline int udplite4_proc_init(void) | |||
108 | 110 | ||
109 | void __init udplite4_register(void) | 111 | void __init udplite4_register(void) |
110 | { | 112 | { |
113 | udp_table_init(&udplite_table); | ||
111 | if (proto_register(&udplite_prot, 1)) | 114 | if (proto_register(&udplite_prot, 1)) |
112 | goto out_register_err; | 115 | goto out_register_err; |
113 | 116 | ||
@@ -126,5 +129,4 @@ out_register_err: | |||
126 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); | 129 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); |
127 | } | 130 | } |
128 | 131 | ||
129 | EXPORT_SYMBOL(udplite_hash); | ||
130 | EXPORT_SYMBOL(udplite_prot); | 132 | EXPORT_SYMBOL(udplite_prot); |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 390dcb1354a5..4ec2162a437e 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -78,7 +78,6 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) | |||
78 | struct udphdr *uh; | 78 | struct udphdr *uh; |
79 | struct iphdr *iph; | 79 | struct iphdr *iph; |
80 | int iphlen, len; | 80 | int iphlen, len; |
81 | int ret; | ||
82 | 81 | ||
83 | __u8 *udpdata; | 82 | __u8 *udpdata; |
84 | __be32 *udpdata32; | 83 | __be32 *udpdata32; |
@@ -152,8 +151,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) | |||
152 | skb_reset_transport_header(skb); | 151 | skb_reset_transport_header(skb); |
153 | 152 | ||
154 | /* process ESP */ | 153 | /* process ESP */ |
155 | ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); | 154 | return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); |
156 | return ret; | ||
157 | 155 | ||
158 | drop: | 156 | drop: |
159 | kfree_skb(skb); | 157 | kfree_skb(skb); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c63de0a72aba..2ad24ba31f9d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -18,7 +18,8 @@ | |||
18 | static struct dst_ops xfrm4_dst_ops; | 18 | static struct dst_ops xfrm4_dst_ops; |
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | 19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
20 | 20 | ||
21 | static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, | 21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
22 | xfrm_address_t *saddr, | ||
22 | xfrm_address_t *daddr) | 23 | xfrm_address_t *daddr) |
23 | { | 24 | { |
24 | struct flowi fl = { | 25 | struct flowi fl = { |
@@ -36,19 +37,20 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, | |||
36 | if (saddr) | 37 | if (saddr) |
37 | fl.fl4_src = saddr->a4; | 38 | fl.fl4_src = saddr->a4; |
38 | 39 | ||
39 | err = __ip_route_output_key(&init_net, &rt, &fl); | 40 | err = __ip_route_output_key(net, &rt, &fl); |
40 | dst = &rt->u.dst; | 41 | dst = &rt->u.dst; |
41 | if (err) | 42 | if (err) |
42 | dst = ERR_PTR(err); | 43 | dst = ERR_PTR(err); |
43 | return dst; | 44 | return dst; |
44 | } | 45 | } |
45 | 46 | ||
46 | static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) | 47 | static int xfrm4_get_saddr(struct net *net, |
48 | xfrm_address_t *saddr, xfrm_address_t *daddr) | ||
47 | { | 49 | { |
48 | struct dst_entry *dst; | 50 | struct dst_entry *dst; |
49 | struct rtable *rt; | 51 | struct rtable *rt; |
50 | 52 | ||
51 | dst = xfrm4_dst_lookup(0, NULL, daddr); | 53 | dst = xfrm4_dst_lookup(net, 0, NULL, daddr); |
52 | if (IS_ERR(dst)) | 54 | if (IS_ERR(dst)) |
53 | return -EHOSTUNREACH; | 55 | return -EHOSTUNREACH; |
54 | 56 | ||
@@ -65,7 +67,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | |||
65 | 67 | ||
66 | read_lock_bh(&policy->lock); | 68 | read_lock_bh(&policy->lock); |
67 | for (dst = policy->bundles; dst; dst = dst->next) { | 69 | for (dst = policy->bundles; dst; dst = dst->next) { |
68 | struct xfrm_dst *xdst = (struct xfrm_dst*)dst; | 70 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |
69 | if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ | 71 | if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ |
70 | xdst->u.rt.fl.fl4_dst == fl->fl4_dst && | 72 | xdst->u.rt.fl.fl4_dst == fl->fl4_dst && |
71 | xdst->u.rt.fl.fl4_src == fl->fl4_src && | 73 | xdst->u.rt.fl.fl4_src == fl->fl4_src && |
@@ -187,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
187 | 189 | ||
188 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) | 190 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
189 | { | 191 | { |
190 | xfrm4_policy_afinfo.garbage_collect(); | 192 | xfrm4_policy_afinfo.garbage_collect(&init_net); |
191 | return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); | 193 | return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); |
192 | } | 194 | } |
193 | 195 | ||
@@ -246,7 +248,6 @@ static struct dst_ops xfrm4_dst_ops = { | |||
246 | .ifdown = xfrm4_dst_ifdown, | 248 | .ifdown = xfrm4_dst_ifdown, |
247 | .local_out = __ip_local_out, | 249 | .local_out = __ip_local_out, |
248 | .gc_thresh = 1024, | 250 | .gc_thresh = 1024, |
249 | .entry_size = sizeof(struct xfrm_dst), | ||
250 | .entries = ATOMIC_INIT(0), | 251 | .entries = ATOMIC_INIT(0), |
251 | }; | 252 | }; |
252 | 253 | ||
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 55dc6beab9aa..1ef1366a0a03 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
@@ -13,8 +13,6 @@ | |||
13 | #include <linux/ipsec.h> | 13 | #include <linux/ipsec.h> |
14 | #include <linux/netfilter_ipv4.h> | 14 | #include <linux/netfilter_ipv4.h> |
15 | 15 | ||
16 | static struct xfrm_state_afinfo xfrm4_state_afinfo; | ||
17 | |||
18 | static int xfrm4_init_flags(struct xfrm_state *x) | 16 | static int xfrm4_init_flags(struct xfrm_state *x) |
19 | { | 17 | { |
20 | if (ipv4_config.no_pmtu_disc) | 18 | if (ipv4_config.no_pmtu_disc) |