aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2006-07-31 20:37:25 -0400
committerPaul Mackerras <paulus@samba.org>2006-07-31 20:37:25 -0400
commit57cad8084e0837e0f2c97da789ec9b3f36809be9 (patch)
treee9c790afb4286f78cb08d9664f58baa7e876fe55 /net/ipv4
parentcb18bd40030c879cd93fef02fd579f74dbab473d (diff)
parent49b1e3ea19b1c95c2f012b8331ffb3b169e4c042 (diff)
Merge branch 'merge'
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c36
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_hash.c6
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c15
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/igmp.c12
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ipcomp.c6
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c21
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c10
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/tcp_compound.c448
-rw-r--r--net/ipv4/tcp_highspeed.c13
-rw-r--r--net/ipv4/tcp_ipv4.c21
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv4/xfrm4_output.c2
34 files changed, 124 insertions, 539 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index da33393be45f..8514106761b0 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -572,16 +572,6 @@ config TCP_CONG_VENO
572 loss packets. 572 loss packets.
573 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 573 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
574 574
575config TCP_CONG_COMPOUND
576 tristate "TCP Compound"
577 depends on EXPERIMENTAL
578 default n
579 ---help---
580 TCP Compound is a sender-side only change to TCP that uses
581 a mixed Reno/Vegas approach to calculate the cwnd.
582 For further details look here:
583 ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
584
585endmenu 575endmenu
586 576
587config TCP_CONG_BIC 577config TCP_CONG_BIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 38b8039bdd55..4878fc5be85f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,7 +47,6 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o 47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o 48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o
51 50
52obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 51obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
53 xfrm4_output.o 52 xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 318d4674faa1..c84a32070f8d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1097,6 +1097,40 @@ int inet_sk_rebuild_header(struct sock *sk)
1097 1097
1098EXPORT_SYMBOL(inet_sk_rebuild_header); 1098EXPORT_SYMBOL(inet_sk_rebuild_header);
1099 1099
1100static int inet_gso_send_check(struct sk_buff *skb)
1101{
1102 struct iphdr *iph;
1103 struct net_protocol *ops;
1104 int proto;
1105 int ihl;
1106 int err = -EINVAL;
1107
1108 if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1109 goto out;
1110
1111 iph = skb->nh.iph;
1112 ihl = iph->ihl * 4;
1113 if (ihl < sizeof(*iph))
1114 goto out;
1115
1116 if (unlikely(!pskb_may_pull(skb, ihl)))
1117 goto out;
1118
1119 skb->h.raw = __skb_pull(skb, ihl);
1120 iph = skb->nh.iph;
1121 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1122 err = -EPROTONOSUPPORT;
1123
1124 rcu_read_lock();
1125 ops = rcu_dereference(inet_protos[proto]);
1126 if (likely(ops && ops->gso_send_check))
1127 err = ops->gso_send_check(skb);
1128 rcu_read_unlock();
1129
1130out:
1131 return err;
1132}
1133
1100static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) 1134static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1101{ 1135{
1102 struct sk_buff *segs = ERR_PTR(-EINVAL); 1136 struct sk_buff *segs = ERR_PTR(-EINVAL);
@@ -1162,6 +1196,7 @@ static struct net_protocol igmp_protocol = {
1162static struct net_protocol tcp_protocol = { 1196static struct net_protocol tcp_protocol = {
1163 .handler = tcp_v4_rcv, 1197 .handler = tcp_v4_rcv,
1164 .err_handler = tcp_v4_err, 1198 .err_handler = tcp_v4_err,
1199 .gso_send_check = tcp_v4_gso_send_check,
1165 .gso_segment = tcp_tso_segment, 1200 .gso_segment = tcp_tso_segment,
1166 .no_policy = 1, 1201 .no_policy = 1,
1167}; 1202};
@@ -1208,6 +1243,7 @@ static int ipv4_proc_init(void);
1208static struct packet_type ip_packet_type = { 1243static struct packet_type ip_packet_type = {
1209 .type = __constant_htons(ETH_P_IP), 1244 .type = __constant_htons(ETH_P_IP),
1210 .func = ip_rcv, 1245 .func = ip_rcv,
1246 .gso_send_check = inet_gso_send_check,
1211 .gso_segment = inet_gso_segment, 1247 .gso_segment = inet_gso_segment,
1212}; 1248};
1213 1249
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 8e748be36c5a..1366bc6ce6a5 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -215,12 +215,10 @@ static int ah_init_state(struct xfrm_state *x)
215 if (x->encap) 215 if (x->encap)
216 goto error; 216 goto error;
217 217
218 ahp = kmalloc(sizeof(*ahp), GFP_KERNEL); 218 ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
219 if (ahp == NULL) 219 if (ahp == NULL)
220 return -ENOMEM; 220 return -ENOMEM;
221 221
222 memset(ahp, 0, sizeof(*ahp));
223
224 ahp->key = x->aalg->alg_key; 222 ahp->key = x->aalg->alg_key;
225 ahp->key_len = (x->aalg->alg_key_len+7)/8; 223 ahp->key_len = (x->aalg->alg_key_len+7)/8;
226 ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); 224 ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7b51b3bdb548..c8a3723bc001 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1372,12 +1372,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
1372{ 1372{
1373 struct seq_file *seq; 1373 struct seq_file *seq;
1374 int rc = -ENOMEM; 1374 int rc = -ENOMEM;
1375 struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 1375 struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1376 1376
1377 if (!s) 1377 if (!s)
1378 goto out; 1378 goto out;
1379 1379
1380 memset(s, 0, sizeof(*s));
1381 rc = seq_open(file, &arp_seq_ops); 1380 rc = seq_open(file, &arp_seq_ops);
1382 if (rc) 1381 if (rc)
1383 goto out_kfree; 1382 goto out_kfree;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7c65e9e5ec9..a6cc31d911eb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -93,10 +93,9 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
93 93
94static struct in_ifaddr *inet_alloc_ifa(void) 94static struct in_ifaddr *inet_alloc_ifa(void)
95{ 95{
96 struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); 96 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
97 97
98 if (ifa) { 98 if (ifa) {
99 memset(ifa, 0, sizeof(*ifa));
100 INIT_RCU_HEAD(&ifa->rcu_head); 99 INIT_RCU_HEAD(&ifa->rcu_head);
101 } 100 }
102 101
@@ -140,10 +139,9 @@ struct in_device *inetdev_init(struct net_device *dev)
140 139
141 ASSERT_RTNL(); 140 ASSERT_RTNL();
142 141
143 in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL); 142 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
144 if (!in_dev) 143 if (!in_dev)
145 goto out; 144 goto out;
146 memset(in_dev, 0, sizeof(*in_dev));
147 INIT_RCU_HEAD(&in_dev->rcu_head); 145 INIT_RCU_HEAD(&in_dev->rcu_head);
148 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); 146 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
149 in_dev->cnf.sysctl = NULL; 147 in_dev->cnf.sysctl = NULL;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4e112738b3fa..fc2f8ce441de 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -316,12 +316,10 @@ static int esp_init_state(struct xfrm_state *x)
316 if (x->ealg == NULL) 316 if (x->ealg == NULL)
317 goto error; 317 goto error;
318 318
319 esp = kmalloc(sizeof(*esp), GFP_KERNEL); 319 esp = kzalloc(sizeof(*esp), GFP_KERNEL);
320 if (esp == NULL) 320 if (esp == NULL)
321 return -ENOMEM; 321 return -ENOMEM;
322 322
323 memset(esp, 0, sizeof(*esp));
324
325 if (x->aalg) { 323 if (x->aalg) {
326 struct xfrm_algo_desc *aalg_desc; 324 struct xfrm_algo_desc *aalg_desc;
327 325
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 3c1d32ad35f2..72c633b357cf 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -204,11 +204,10 @@ static struct fn_zone *
204fn_new_zone(struct fn_hash *table, int z) 204fn_new_zone(struct fn_hash *table, int z)
205{ 205{
206 int i; 206 int i;
207 struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL); 207 struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
208 if (!fz) 208 if (!fz)
209 return NULL; 209 return NULL;
210 210
211 memset(fz, 0, sizeof(struct fn_zone));
212 if (z) { 211 if (z) {
213 fz->fz_divisor = 16; 212 fz->fz_divisor = 16;
214 } else { 213 } else {
@@ -1046,7 +1045,7 @@ static int fib_seq_open(struct inode *inode, struct file *file)
1046{ 1045{
1047 struct seq_file *seq; 1046 struct seq_file *seq;
1048 int rc = -ENOMEM; 1047 int rc = -ENOMEM;
1049 struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 1048 struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1050 1049
1051 if (!s) 1050 if (!s)
1052 goto out; 1051 goto out;
@@ -1057,7 +1056,6 @@ static int fib_seq_open(struct inode *inode, struct file *file)
1057 1056
1058 seq = file->private_data; 1057 seq = file->private_data;
1059 seq->private = s; 1058 seq->private = s;
1060 memset(s, 0, sizeof(*s));
1061out: 1059out:
1062 return rc; 1060 return rc;
1063out_kfree: 1061out_kfree:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6c642d11d4ca..79b04718bdfd 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -196,10 +196,9 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
196 } 196 }
197 } 197 }
198 198
199 new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); 199 new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
200 if (!new_r) 200 if (!new_r)
201 return -ENOMEM; 201 return -ENOMEM;
202 memset(new_r, 0, sizeof(*new_r));
203 202
204 if (rta[RTA_SRC-1]) 203 if (rta[RTA_SRC-1])
205 memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); 204 memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
@@ -457,13 +456,13 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
457 456
458 rcu_read_lock(); 457 rcu_read_lock();
459 hlist_for_each_entry(r, node, &fib_rules, hlist) { 458 hlist_for_each_entry(r, node, &fib_rules, hlist) {
460
461 if (idx < s_idx) 459 if (idx < s_idx)
462 continue; 460 goto next;
463 if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, 461 if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
464 cb->nlh->nlmsg_seq, 462 cb->nlh->nlmsg_seq,
465 RTM_NEWRULE, NLM_F_MULTI) < 0) 463 RTM_NEWRULE, NLM_F_MULTI) < 0)
466 break; 464 break;
465next:
467 idx++; 466 idx++;
468 } 467 }
469 rcu_read_unlock(); 468 rcu_read_unlock();
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5f87533684d5..9be53a8e72c3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -709,11 +709,10 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
709 goto failure; 709 goto failure;
710 } 710 }
711 711
712 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 712 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
713 if (fi == NULL) 713 if (fi == NULL)
714 goto failure; 714 goto failure;
715 fib_info_cnt++; 715 fib_info_cnt++;
716 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
717 716
718 fi->fib_protocol = r->rtm_protocol; 717 fi->fib_protocol = r->rtm_protocol;
719 718
@@ -962,10 +961,6 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
962 rtm->rtm_protocol = fi->fib_protocol; 961 rtm->rtm_protocol = fi->fib_protocol;
963 if (fi->fib_priority) 962 if (fi->fib_priority)
964 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 963 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
965#ifdef CONFIG_NET_CLS_ROUTE
966 if (fi->fib_nh[0].nh_tclassid)
967 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
968#endif
969 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 964 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
970 goto rtattr_failure; 965 goto rtattr_failure;
971 if (fi->fib_prefsrc) 966 if (fi->fib_prefsrc)
@@ -975,6 +970,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
975 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 970 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
976 if (fi->fib_nh->nh_oif) 971 if (fi->fib_nh->nh_oif)
977 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 972 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
973#ifdef CONFIG_NET_CLS_ROUTE
974 if (fi->fib_nh[0].nh_tclassid)
975 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
976#endif
978 } 977 }
979#ifdef CONFIG_IP_ROUTE_MULTIPATH 978#ifdef CONFIG_IP_ROUTE_MULTIPATH
980 if (fi->fib_nhs > 1) { 979 if (fi->fib_nhs > 1) {
@@ -993,6 +992,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
993 nhp->rtnh_ifindex = nh->nh_oif; 992 nhp->rtnh_ifindex = nh->nh_oif;
994 if (nh->nh_gw) 993 if (nh->nh_gw)
995 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 994 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
995#ifdef CONFIG_NET_CLS_ROUTE
996 if (nh->nh_tclassid)
997 RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
998#endif
996 nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 999 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
997 } endfor_nexthops(fi); 1000 } endfor_nexthops(fi);
998 mp_head->rta_type = RTA_MULTIPATH; 1001 mp_head->rta_type = RTA_MULTIPATH;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1cb65305e102..23fb9d9768e3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1252,8 +1252,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1252 */ 1252 */
1253 1253
1254 if (!fa_head) { 1254 if (!fa_head) {
1255 fa_head = fib_insert_node(t, &err, key, plen);
1256 err = 0; 1255 err = 0;
1256 fa_head = fib_insert_node(t, &err, key, plen);
1257 if (err) 1257 if (err)
1258 goto out_free_new_fa; 1258 goto out_free_new_fa;
1259 } 1259 }
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d299c8e547d6..9f4b752f5a33 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1028,10 +1028,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1028 * for deleted items allows change reports to use common code with 1028 * for deleted items allows change reports to use common code with
1029 * non-deleted or query-response MCA's. 1029 * non-deleted or query-response MCA's.
1030 */ 1030 */
1031 pmc = kmalloc(sizeof(*pmc), GFP_KERNEL); 1031 pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
1032 if (!pmc) 1032 if (!pmc)
1033 return; 1033 return;
1034 memset(pmc, 0, sizeof(*pmc));
1035 spin_lock_bh(&im->lock); 1034 spin_lock_bh(&im->lock);
1036 pmc->interface = im->interface; 1035 pmc->interface = im->interface;
1037 in_dev_hold(in_dev); 1036 in_dev_hold(in_dev);
@@ -1529,10 +1528,9 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1529 psf_prev = psf; 1528 psf_prev = psf;
1530 } 1529 }
1531 if (!psf) { 1530 if (!psf) {
1532 psf = kmalloc(sizeof(*psf), GFP_ATOMIC); 1531 psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
1533 if (!psf) 1532 if (!psf)
1534 return -ENOBUFS; 1533 return -ENOBUFS;
1535 memset(psf, 0, sizeof(*psf));
1536 psf->sf_inaddr = *psfsrc; 1534 psf->sf_inaddr = *psfsrc;
1537 if (psf_prev) { 1535 if (psf_prev) {
1538 psf_prev->sf_next = psf; 1536 psf_prev->sf_next = psf;
@@ -2380,7 +2378,7 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
2380{ 2378{
2381 struct seq_file *seq; 2379 struct seq_file *seq;
2382 int rc = -ENOMEM; 2380 int rc = -ENOMEM;
2383 struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 2381 struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
2384 2382
2385 if (!s) 2383 if (!s)
2386 goto out; 2384 goto out;
@@ -2390,7 +2388,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
2390 2388
2391 seq = file->private_data; 2389 seq = file->private_data;
2392 seq->private = s; 2390 seq->private = s;
2393 memset(s, 0, sizeof(*s));
2394out: 2391out:
2395 return rc; 2392 return rc;
2396out_kfree: 2393out_kfree:
@@ -2555,7 +2552,7 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
2555{ 2552{
2556 struct seq_file *seq; 2553 struct seq_file *seq;
2557 int rc = -ENOMEM; 2554 int rc = -ENOMEM;
2558 struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 2555 struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
2559 2556
2560 if (!s) 2557 if (!s)
2561 goto out; 2558 goto out;
@@ -2565,7 +2562,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
2565 2562
2566 seq = file->private_data; 2563 seq = file->private_data;
2567 seq->private = s; 2564 seq->private = s;
2568 memset(s, 0, sizeof(*s));
2569out: 2565out:
2570 return rc; 2566 return rc;
2571out_kfree: 2567out_kfree:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e7e41b66c79..492858e6faf0 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -909,11 +909,10 @@ static int __init inet_diag_init(void)
909 sizeof(struct inet_diag_handler *)); 909 sizeof(struct inet_diag_handler *));
910 int err = -ENOMEM; 910 int err = -ENOMEM;
911 911
912 inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); 912 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
913 if (!inet_diag_table) 913 if (!inet_diag_table)
914 goto out; 914 goto out;
915 915
916 memset(inet_diag_table, 0, inet_diag_table_size);
917 idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, 916 idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
918 THIS_MODULE); 917 THIS_MODULE);
919 if (idiagnl == NULL) 918 if (idiagnl == NULL)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2160874ce7aa..03ff62ebcfeb 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty;
86static DEFINE_RWLOCK(peer_pool_lock); 86static DEFINE_RWLOCK(peer_pool_lock);
87#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 87#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
88 88
89static volatile int peer_total; 89static int peer_total;
90/* Exported for sysctl_net_ipv4. */ 90/* Exported for sysctl_net_ipv4. */
91int inet_peer_threshold = 65536 + 128; /* start to throw entries more 91int inet_peer_threshold = 65536 + 128; /* start to throw entries more
92 * aggressively at this stage */ 92 * aggressively at this stage */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6ff9b10d9563..0f9b3a31997b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -617,7 +617,6 @@ static int ipgre_rcv(struct sk_buff *skb)
617 skb->mac.raw = skb->nh.raw; 617 skb->mac.raw = skb->nh.raw;
618 skb->nh.raw = __pskb_pull(skb, offset); 618 skb->nh.raw = __pskb_pull(skb, offset);
619 skb_postpull_rcsum(skb, skb->h.raw, offset); 619 skb_postpull_rcsum(skb, skb->h.raw, offset);
620 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
621 skb->pkt_type = PACKET_HOST; 620 skb->pkt_type = PACKET_HOST;
622#ifdef CONFIG_NET_IPGRE_BROADCAST 621#ifdef CONFIG_NET_IPGRE_BROADCAST
623 if (MULTICAST(iph->daddr)) { 622 if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e1a7dba2fa8a..212734ca238f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -428,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
428 goto drop; 428 goto drop;
429 } 429 }
430 430
431 /* Remove any debris in the socket control block */
432 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
433
431 return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, 434 return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
432 ip_rcv_finish); 435 ip_rcv_finish);
433 436
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index cbcae6544622..406056edc02b 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -256,7 +256,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
256 256
257 if (!opt) { 257 if (!opt) {
258 opt = &(IPCB(skb)->opt); 258 opt = &(IPCB(skb)->opt);
259 memset(opt, 0, sizeof(struct ip_options));
260 iph = skb->nh.raw; 259 iph = skb->nh.raw;
261 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); 260 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
262 optptr = iph + sizeof(struct iphdr); 261 optptr = iph + sizeof(struct iphdr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ca0e714613fb..7c9f9a6421b8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -209,7 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
209 return dst_output(skb); 209 return dst_output(skb);
210 } 210 }
211#endif 211#endif
212 if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) 212 if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
213 return ip_fragment(skb, ip_finish_output2); 213 return ip_fragment(skb, ip_finish_output2);
214 else 214 else
215 return ip_finish_output2(skb); 215 return ip_finish_output2(skb);
@@ -1095,7 +1095,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1095 while (size > 0) { 1095 while (size > 0) {
1096 int i; 1096 int i;
1097 1097
1098 if (skb_shinfo(skb)->gso_size) 1098 if (skb_is_gso(skb))
1099 len = size; 1099 len = size;
1100 else { 1100 else {
1101 1101
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 8e0374847532..a0c28b2b756e 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -70,7 +70,8 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
70 if (err) 70 if (err)
71 goto out; 71 goto out;
72 72
73 skb_put(skb, dlen - plen); 73 skb->truesize += dlen - plen;
74 __skb_put(skb, dlen - plen);
74 memcpy(skb->data, scratch, dlen); 75 memcpy(skb->data, scratch, dlen);
75out: 76out:
76 put_cpu(); 77 put_cpu();
@@ -409,11 +410,10 @@ static int ipcomp_init_state(struct xfrm_state *x)
409 goto out; 410 goto out;
410 411
411 err = -ENOMEM; 412 err = -ENOMEM;
412 ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL); 413 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
413 if (!ipcd) 414 if (!ipcd)
414 goto out; 415 goto out;
415 416
416 memset(ipcd, 0, sizeof(*ipcd));
417 x->props.header_len = 0; 417 x->props.header_len = 0;
418 if (x->props.mode) 418 if (x->props.mode)
419 x->props.header_len += sizeof(struct iphdr); 419 x->props.header_len += sizeof(struct iphdr);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3291d5192aad..76ab50b0d6ef 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -487,7 +487,6 @@ static int ipip_rcv(struct sk_buff *skb)
487 487
488 skb->mac.raw = skb->nh.raw; 488 skb->mac.raw = skb->nh.raw;
489 skb->nh.raw = skb->data; 489 skb->nh.raw = skb->data;
490 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491 skb->protocol = htons(ETH_P_IP); 490 skb->protocol = htons(ETH_P_IP);
492 skb->pkt_type = PACKET_HOST; 491 skb->pkt_type = PACKET_HOST;
493 492
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ba33f8621c67..85893eef6b16 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1461,7 +1461,6 @@ int pim_rcv_v1(struct sk_buff * skb)
1461 skb_pull(skb, (u8*)encap - skb->data); 1461 skb_pull(skb, (u8*)encap - skb->data);
1462 skb->nh.iph = (struct iphdr *)skb->data; 1462 skb->nh.iph = (struct iphdr *)skb->data;
1463 skb->dev = reg_dev; 1463 skb->dev = reg_dev;
1464 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1465 skb->protocol = htons(ETH_P_IP); 1464 skb->protocol = htons(ETH_P_IP);
1466 skb->ip_summed = 0; 1465 skb->ip_summed = 0;
1467 skb->pkt_type = PACKET_HOST; 1466 skb->pkt_type = PACKET_HOST;
@@ -1517,7 +1516,6 @@ static int pim_rcv(struct sk_buff * skb)
1517 skb_pull(skb, (u8*)encap - skb->data); 1516 skb_pull(skb, (u8*)encap - skb->data);
1518 skb->nh.iph = (struct iphdr *)skb->data; 1517 skb->nh.iph = (struct iphdr *)skb->data;
1519 skb->dev = reg_dev; 1518 skb->dev = reg_dev;
1520 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1521 skb->protocol = htons(ETH_P_IP); 1519 skb->protocol = htons(ETH_P_IP);
1522 skb->ip_summed = 0; 1520 skb->ip_summed = 0;
1523 skb->pkt_type = PACKET_HOST; 1521 skb->pkt_type = PACKET_HOST;
@@ -1580,6 +1578,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1580 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1578 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1581 1579
1582 if (cache==NULL) { 1580 if (cache==NULL) {
1581 struct sk_buff *skb2;
1583 struct net_device *dev; 1582 struct net_device *dev;
1584 int vif; 1583 int vif;
1585 1584
@@ -1593,12 +1592,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1593 read_unlock(&mrt_lock); 1592 read_unlock(&mrt_lock);
1594 return -ENODEV; 1593 return -ENODEV;
1595 } 1594 }
1596 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 1595 skb2 = skb_clone(skb, GFP_ATOMIC);
1597 skb->nh.iph->ihl = sizeof(struct iphdr)>>2; 1596 if (!skb2) {
1598 skb->nh.iph->saddr = rt->rt_src; 1597 read_unlock(&mrt_lock);
1599 skb->nh.iph->daddr = rt->rt_dst; 1598 return -ENOMEM;
1600 skb->nh.iph->version = 0; 1599 }
1601 err = ipmr_cache_unresolved(vif, skb); 1600
1601 skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
1602 skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
1603 skb2->nh.iph->saddr = rt->rt_src;
1604 skb2->nh.iph->daddr = rt->rt_dst;
1605 skb2->nh.iph->version = 0;
1606 err = ipmr_cache_unresolved(vif, skb2);
1602 read_unlock(&mrt_lock); 1607 read_unlock(&mrt_lock);
1603 return err; 1608 return err;
1604 } 1609 }
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f28ec6882162..6a28fafe910c 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -735,12 +735,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
735 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 735 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
736 return -EINVAL; 736 return -EINVAL;
737 737
738 dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 738 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
739 if (dest == NULL) { 739 if (dest == NULL) {
740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n"); 740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
741 return -ENOMEM; 741 return -ENOMEM;
742 } 742 }
743 memset(dest, 0, sizeof(struct ip_vs_dest));
744 743
745 dest->protocol = svc->protocol; 744 dest->protocol = svc->protocol;
746 dest->vaddr = svc->addr; 745 dest->vaddr = svc->addr;
@@ -1050,14 +1049,12 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1050 goto out_mod_dec; 1049 goto out_mod_dec;
1051 } 1050 }
1052 1051
1053 svc = (struct ip_vs_service *) 1052 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1054 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1055 if (svc == NULL) { 1053 if (svc == NULL) {
1056 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); 1054 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1057 ret = -ENOMEM; 1055 ret = -ENOMEM;
1058 goto out_err; 1056 goto out_err;
1059 } 1057 }
1060 memset(svc, 0, sizeof(struct ip_vs_service));
1061 1058
1062 /* I'm the first user of the service */ 1059 /* I'm the first user of the service */
1063 atomic_set(&svc->usecnt, 1); 1060 atomic_set(&svc->usecnt, 1);
@@ -1797,7 +1794,7 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
1797{ 1794{
1798 struct seq_file *seq; 1795 struct seq_file *seq;
1799 int rc = -ENOMEM; 1796 int rc = -ENOMEM;
1800 struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 1797 struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
1801 1798
1802 if (!s) 1799 if (!s)
1803 goto out; 1800 goto out;
@@ -1808,7 +1805,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
1808 1805
1809 seq = file->private_data; 1806 seq = file->private_data;
1810 seq->private = s; 1807 seq->private = s;
1811 memset(s, 0, sizeof(*s));
1812out: 1808out:
1813 return rc; 1809 return rc;
1814out_kfree: 1810out_kfree:
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 4c1940381ba0..7d68b80c4c19 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -123,11 +123,10 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
123{ 123{
124 struct ip_vs_estimator *est; 124 struct ip_vs_estimator *est;
125 125
126 est = kmalloc(sizeof(*est), GFP_KERNEL); 126 est = kzalloc(sizeof(*est), GFP_KERNEL);
127 if (est == NULL) 127 if (est == NULL)
128 return -ENOMEM; 128 return -ENOMEM;
129 129
130 memset(est, 0, sizeof(*est));
131 est->stats = stats; 130 est->stats = stats;
132 est->last_conns = stats->conns; 131 est->last_conns = stats->conns;
133 est->cps = stats->cps<<10; 132 est->cps = stats->cps<<10;
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index af35235672d5..9a39e2969712 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -1200,7 +1200,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
1200 tuple.dst.protonum = IPPROTO_TCP; 1200 tuple.dst.protonum = IPPROTO_TCP;
1201 1201
1202 exp = __ip_conntrack_expect_find(&tuple); 1202 exp = __ip_conntrack_expect_find(&tuple);
1203 if (exp->master == ct) 1203 if (exp && exp->master == ct)
1204 return exp; 1204 return exp;
1205 return NULL; 1205 return NULL;
1206} 1206}
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7bd3c22003a2..7a9fa04a467a 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -534,6 +534,8 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
534 534
535/* Sysctl support */ 535/* Sysctl support */
536 536
537int ip_conntrack_checksum = 1;
538
537#ifdef CONFIG_SYSCTL 539#ifdef CONFIG_SYSCTL
538 540
539/* From ip_conntrack_core.c */ 541/* From ip_conntrack_core.c */
@@ -568,8 +570,6 @@ extern unsigned int ip_ct_generic_timeout;
568static int log_invalid_proto_min = 0; 570static int log_invalid_proto_min = 0;
569static int log_invalid_proto_max = 255; 571static int log_invalid_proto_max = 255;
570 572
571int ip_conntrack_checksum = 1;
572
573static struct ctl_table_header *ip_ct_sysctl_header; 573static struct ctl_table_header *ip_ct_sysctl_header;
574 574
575static ctl_table ip_ct_sysctl_table[] = { 575static ctl_table ip_ct_sysctl_table[] = {
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 0b1b416759cc..18b7fbdccb61 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1255,9 +1255,9 @@ static int help(struct sk_buff **pskb,
1255 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); 1255 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
1256 1256
1257 /* SNMP replies and originating SNMP traps get mangled */ 1257 /* SNMP replies and originating SNMP traps get mangled */
1258 if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY) 1258 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
1259 return NF_ACCEPT; 1259 return NF_ACCEPT;
1260 if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) 1260 if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
1261 return NF_ACCEPT; 1261 return NF_ACCEPT;
1262 1262
1263 /* No NAT? */ 1263 /* No NAT? */
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index cbffeae3f565..d994c5f5744c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -172,11 +172,10 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
172 struct clusterip_config *c; 172 struct clusterip_config *c;
173 char buffer[16]; 173 char buffer[16];
174 174
175 c = kmalloc(sizeof(*c), GFP_ATOMIC); 175 c = kzalloc(sizeof(*c), GFP_ATOMIC);
176 if (!c) 176 if (!c)
177 return NULL; 177 return NULL;
178 178
179 memset(c, 0, sizeof(*c));
180 c->dev = dev; 179 c->dev = dev;
181 c->clusterip = ip; 180 c->clusterip = ip;
182 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 181 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bd221ec3f81e..62b2762a2420 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -609,6 +609,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
609 if (sin) { 609 if (sin) {
610 sin->sin_family = AF_INET; 610 sin->sin_family = AF_INET;
611 sin->sin_addr.s_addr = skb->nh.iph->saddr; 611 sin->sin_addr.s_addr = skb->nh.iph->saddr;
612 sin->sin_port = 0;
612 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 613 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
613 } 614 }
614 if (inet->cmsg_flags) 615 if (inet->cmsg_flags)
diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
deleted file mode 100644
index bc54f7e9aea9..000000000000
--- a/net/ipv4/tcp_compound.c
+++ /dev/null
@@ -1,448 +0,0 @@
1/*
2 * TCP Vegas congestion control
3 *
4 * This is based on the congestion detection/avoidance scheme described in
5 * Lawrence S. Brakmo and Larry L. Peterson.
6 * "TCP Vegas: End to end congestion avoidance on a global internet."
7 * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
8 * October 1995. Available from:
9 * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
10 *
11 * See http://www.cs.arizona.edu/xkernel/ for their implementation.
12 * The main aspects that distinguish this implementation from the
13 * Arizona Vegas implementation are:
14 * o We do not change the loss detection or recovery mechanisms of
15 * Linux in any way. Linux already recovers from losses quite well,
16 * using fine-grained timers, NewReno, and FACK.
17 * o To avoid the performance penalty imposed by increasing cwnd
18 * only every-other RTT during slow start, we increase during
19 * every RTT during slow start, just like Reno.
20 * o Largely to allow continuous cwnd growth during slow start,
21 * we use the rate at which ACKs come back as the "actual"
22 * rate, rather than the rate at which data is sent.
23 * o To speed convergence to the right rate, we set the cwnd
24 * to achieve the right ("actual") rate when we exit slow start.
25 * o To filter out the noise caused by delayed ACKs, we use the
26 * minimum RTT sample observed during the last RTT to calculate
27 * the actual rate.
28 * o When the sender re-starts from idle, it waits until it has
29 * received ACKs for an entire flight of new data before making
30 * a cwnd adjustment decision. The original Vegas implementation
31 * assumed senders never went idle.
32 *
33 *
34 * TCP Compound based on TCP Vegas
35 *
36 * further details can be found here:
37 * ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
38 */
39
40#include <linux/config.h>
41#include <linux/mm.h>
42#include <linux/module.h>
43#include <linux/skbuff.h>
44#include <linux/inet_diag.h>
45
46#include <net/tcp.h>
47
48/* Default values of the Vegas variables, in fixed-point representation
49 * with V_PARAM_SHIFT bits to the right of the binary point.
50 */
51#define V_PARAM_SHIFT 1
52
53#define TCP_COMPOUND_ALPHA 3U
54#define TCP_COMPOUND_BETA 1U
55#define TCP_COMPOUND_GAMMA 30
56#define TCP_COMPOUND_ZETA 1
57
58/* TCP compound variables */
59struct compound {
60 u32 beg_snd_nxt; /* right edge during last RTT */
61 u32 beg_snd_una; /* left edge during last RTT */
62 u32 beg_snd_cwnd; /* saves the size of the cwnd */
63 u8 doing_vegas_now; /* if true, do vegas for this RTT */
64 u16 cntRTT; /* # of RTTs measured within last RTT */
65 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
66 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
67
68 u32 cwnd;
69 u32 dwnd;
70};
71
72/* There are several situations when we must "re-start" Vegas:
73 *
74 * o when a connection is established
75 * o after an RTO
76 * o after fast recovery
77 * o when we send a packet and there is no outstanding
78 * unacknowledged data (restarting an idle connection)
79 *
80 * In these circumstances we cannot do a Vegas calculation at the
81 * end of the first RTT, because any calculation we do is using
82 * stale info -- both the saved cwnd and congestion feedback are
83 * stale.
84 *
85 * Instead we must wait until the completion of an RTT during
86 * which we actually receive ACKs.
87 */
88static inline void vegas_enable(struct sock *sk)
89{
90 const struct tcp_sock *tp = tcp_sk(sk);
91 struct compound *vegas = inet_csk_ca(sk);
92
93 /* Begin taking Vegas samples next time we send something. */
94 vegas->doing_vegas_now = 1;
95
96 /* Set the beginning of the next send window. */
97 vegas->beg_snd_nxt = tp->snd_nxt;
98
99 vegas->cntRTT = 0;
100 vegas->minRTT = 0x7fffffff;
101}
102
103/* Stop taking Vegas samples for now. */
104static inline void vegas_disable(struct sock *sk)
105{
106 struct compound *vegas = inet_csk_ca(sk);
107
108 vegas->doing_vegas_now = 0;
109}
110
111static void tcp_compound_init(struct sock *sk)
112{
113 struct compound *vegas = inet_csk_ca(sk);
114 const struct tcp_sock *tp = tcp_sk(sk);
115
116 vegas->baseRTT = 0x7fffffff;
117 vegas_enable(sk);
118
119 vegas->dwnd = 0;
120 vegas->cwnd = tp->snd_cwnd;
121}
122
123/* Do RTT sampling needed for Vegas.
124 * Basically we:
125 * o min-filter RTT samples from within an RTT to get the current
126 * propagation delay + queuing delay (we are min-filtering to try to
127 * avoid the effects of delayed ACKs)
128 * o min-filter RTT samples from a much longer window (forever for now)
129 * to find the propagation delay (baseRTT)
130 */
131static void tcp_compound_rtt_calc(struct sock *sk, u32 usrtt)
132{
133 struct compound *vegas = inet_csk_ca(sk);
134 u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
135
136 /* Filter to find propagation delay: */
137 if (vrtt < vegas->baseRTT)
138 vegas->baseRTT = vrtt;
139
140 /* Find the min RTT during the last RTT to find
141 * the current prop. delay + queuing delay:
142 */
143
144 vegas->minRTT = min(vegas->minRTT, vrtt);
145 vegas->cntRTT++;
146}
147
148static void tcp_compound_state(struct sock *sk, u8 ca_state)
149{
150
151 if (ca_state == TCP_CA_Open)
152 vegas_enable(sk);
153 else
154 vegas_disable(sk);
155}
156
157
158/* 64bit divisor, dividend and result. dynamic precision */
159static inline u64 div64_64(u64 dividend, u64 divisor)
160{
161 u32 d = divisor;
162
163 if (divisor > 0xffffffffULL) {
164 unsigned int shift = fls(divisor >> 32);
165
166 d = divisor >> shift;
167 dividend >>= shift;
168 }
169
170 /* avoid 64 bit division if possible */
171 if (dividend >> 32)
172 do_div(dividend, d);
173 else
174 dividend = (u32) dividend / d;
175
176 return dividend;
177}
178
179/* calculate the quartic root of "a" using Newton-Raphson */
180static u32 qroot(u64 a)
181{
182 u32 x, x1;
183
184 /* Initial estimate is based on:
185 * qrt(x) = exp(log(x) / 4)
186 */
187 x = 1u << (fls64(a) >> 2);
188
189 /*
190 * Iteration based on:
191 * 3
192 * x = ( 3 * x + a / x ) / 4
193 * k+1 k k
194 */
195 do {
196 u64 x3 = x;
197
198 x1 = x;
199 x3 *= x;
200 x3 *= x;
201
202 x = (3 * x + (u32) div64_64(a, x3)) / 4;
203 } while (abs(x1 - x) > 1);
204
205 return x;
206}
207
208
209/*
210 * If the connection is idle and we are restarting,
211 * then we don't want to do any Vegas calculations
212 * until we get fresh RTT samples. So when we
213 * restart, we reset our Vegas state to a clean
214 * slate. After we get acks for this flight of
215 * packets, _then_ we can make Vegas calculations
216 * again.
217 */
218static void tcp_compound_cwnd_event(struct sock *sk, enum tcp_ca_event event)
219{
220 if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
221 tcp_compound_init(sk);
222}
223
224static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
225 u32 seq_rtt, u32 in_flight, int flag)
226{
227 struct tcp_sock *tp = tcp_sk(sk);
228 struct compound *vegas = inet_csk_ca(sk);
229 u8 inc = 0;
230
231 if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) {
232 if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) {
233 vegas->cwnd = tp->snd_cwnd;
234 vegas->dwnd = 0;
235 } else
236 vegas->cwnd = tp->snd_cwnd - vegas->dwnd;
237
238 }
239
240 if (!tcp_is_cwnd_limited(sk, in_flight))
241 return;
242
243 if (vegas->cwnd <= tp->snd_ssthresh)
244 inc = 1;
245 else if (tp->snd_cwnd_cnt < tp->snd_cwnd)
246 tp->snd_cwnd_cnt++;
247
248 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
249 inc = 1;
250 tp->snd_cwnd_cnt = 0;
251 }
252
253 if (inc && tp->snd_cwnd < tp->snd_cwnd_clamp)
254 vegas->cwnd++;
255
256 /* The key players are v_beg_snd_una and v_beg_snd_nxt.
257 *
258 * These are so named because they represent the approximate values
259 * of snd_una and snd_nxt at the beginning of the current RTT. More
260 * precisely, they represent the amount of data sent during the RTT.
261 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
262 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
263 * bytes of data have been ACKed during the course of the RTT, giving
264 * an "actual" rate of:
265 *
266 * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
267 *
268 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
269 * because delayed ACKs can cover more than one segment, so they
270 * don't line up nicely with the boundaries of RTTs.
271 *
272 * Another unfortunate fact of life is that delayed ACKs delay the
273 * advance of the left edge of our send window, so that the number
274 * of bytes we send in an RTT is often less than our cwnd will allow.
275 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
276 */
277
278 if (after(ack, vegas->beg_snd_nxt)) {
279 /* Do the Vegas once-per-RTT cwnd adjustment. */
280 u32 old_wnd, old_snd_cwnd;
281
282 /* Here old_wnd is essentially the window of data that was
283 * sent during the previous RTT, and has all
284 * been acknowledged in the course of the RTT that ended
285 * with the ACK we just received. Likewise, old_snd_cwnd
286 * is the cwnd during the previous RTT.
287 */
288 if (!tp->mss_cache)
289 return;
290
291 old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
292 tp->mss_cache;
293 old_snd_cwnd = vegas->beg_snd_cwnd;
294
295 /* Save the extent of the current window so we can use this
296 * at the end of the next RTT.
297 */
298 vegas->beg_snd_una = vegas->beg_snd_nxt;
299 vegas->beg_snd_nxt = tp->snd_nxt;
300 vegas->beg_snd_cwnd = tp->snd_cwnd;
301
302 /* We do the Vegas calculations only if we got enough RTT
303 * samples that we can be reasonably sure that we got
304 * at least one RTT sample that wasn't from a delayed ACK.
305 * If we only had 2 samples total,
306 * then that means we're getting only 1 ACK per RTT, which
307 * means they're almost certainly delayed ACKs.
308 * If we have 3 samples, we should be OK.
309 */
310
311 if (vegas->cntRTT > 2) {
312 u32 rtt, target_cwnd, diff;
313 u32 brtt, dwnd;
314
315 /* We have enough RTT samples, so, using the Vegas
316 * algorithm, we determine if we should increase or
317 * decrease cwnd, and by how much.
318 */
319
320 /* Pluck out the RTT we are using for the Vegas
321 * calculations. This is the min RTT seen during the
322 * last RTT. Taking the min filters out the effects
323 * of delayed ACKs, at the cost of noticing congestion
324 * a bit later.
325 */
326 rtt = vegas->minRTT;
327
328 /* Calculate the cwnd we should have, if we weren't
329 * going too fast.
330 *
331 * This is:
332 * (actual rate in segments) * baseRTT
333 * We keep it as a fixed point number with
334 * V_PARAM_SHIFT bits to the right of the binary point.
335 */
336 if (!rtt)
337 return;
338
339 brtt = vegas->baseRTT;
340 target_cwnd = ((old_wnd * brtt)
341 << V_PARAM_SHIFT) / rtt;
342
343 /* Calculate the difference between the window we had,
344 * and the window we would like to have. This quantity
345 * is the "Diff" from the Arizona Vegas papers.
346 *
347 * Again, this is a fixed point number with
348 * V_PARAM_SHIFT bits to the right of the binary
349 * point.
350 */
351
352 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
353
354 dwnd = vegas->dwnd;
355
356 if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) {
357 u64 v;
358 u32 x;
359
360 /*
361 * The TCP Compound paper describes the choice
362 * of "k" determines the agressiveness,
363 * ie. slope of the response function.
364 *
365 * For same value as HSTCP would be 0.8
366 * but for computaional reasons, both the
367 * original authors and this implementation
368 * use 0.75.
369 */
370 v = old_wnd;
371 x = qroot(v * v * v) >> TCP_COMPOUND_ALPHA;
372 if (x > 1)
373 dwnd = x - 1;
374 else
375 dwnd = 0;
376
377 dwnd += vegas->dwnd;
378
379 } else if ((dwnd << V_PARAM_SHIFT) <
380 (diff * TCP_COMPOUND_BETA))
381 dwnd = 0;
382 else
383 dwnd =
384 ((dwnd << V_PARAM_SHIFT) -
385 (diff *
386 TCP_COMPOUND_BETA)) >> V_PARAM_SHIFT;
387
388 vegas->dwnd = dwnd;
389
390 }
391
392 /* Wipe the slate clean for the next RTT. */
393 vegas->cntRTT = 0;
394 vegas->minRTT = 0x7fffffff;
395 }
396
397 tp->snd_cwnd = vegas->cwnd + vegas->dwnd;
398}
399
400/* Extract info for Tcp socket info provided via netlink. */
401static void tcp_compound_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
402{
403 const struct compound *ca = inet_csk_ca(sk);
404 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
405 struct tcpvegas_info *info;
406
407 info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
408 sizeof(*info)));
409
410 info->tcpv_enabled = ca->doing_vegas_now;
411 info->tcpv_rttcnt = ca->cntRTT;
412 info->tcpv_rtt = ca->baseRTT;
413 info->tcpv_minrtt = ca->minRTT;
414 rtattr_failure:;
415 }
416}
417
418static struct tcp_congestion_ops tcp_compound = {
419 .init = tcp_compound_init,
420 .ssthresh = tcp_reno_ssthresh,
421 .cong_avoid = tcp_compound_cong_avoid,
422 .rtt_sample = tcp_compound_rtt_calc,
423 .set_state = tcp_compound_state,
424 .cwnd_event = tcp_compound_cwnd_event,
425 .get_info = tcp_compound_get_info,
426
427 .owner = THIS_MODULE,
428 .name = "compound",
429};
430
431static int __init tcp_compound_register(void)
432{
433 BUG_ON(sizeof(struct compound) > ICSK_CA_PRIV_SIZE);
434 tcp_register_congestion_control(&tcp_compound);
435 return 0;
436}
437
438static void __exit tcp_compound_unregister(void)
439{
440 tcp_unregister_congestion_control(&tcp_compound);
441}
442
443module_init(tcp_compound_register);
444module_exit(tcp_compound_unregister);
445
446MODULE_AUTHOR("Angelo P. Castellani, Stephen Hemminger");
447MODULE_LICENSE("GPL");
448MODULE_DESCRIPTION("TCP Compound");
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index aaa1538c0692..fa3e1aad660c 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -139,14 +139,19 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
139 tp->snd_cwnd++; 139 tp->snd_cwnd++;
140 } 140 }
141 } else { 141 } else {
142 /* Update AIMD parameters */ 142 /* Update AIMD parameters.
143 *
144 * We want to guarantee that:
145 * hstcp_aimd_vals[ca->ai-1].cwnd <
146 * snd_cwnd <=
147 * hstcp_aimd_vals[ca->ai].cwnd
148 */
143 if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { 149 if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
144 while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && 150 while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
145 ca->ai < HSTCP_AIMD_MAX - 1) 151 ca->ai < HSTCP_AIMD_MAX - 1)
146 ca->ai++; 152 ca->ai++;
147 } else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) { 153 } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
148 while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && 154 while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
149 ca->ai > 0)
150 ca->ai--; 155 ca->ai--;
151 } 156 }
152 157
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a886e6efbbe..f6f39e814291 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -496,6 +496,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
496 } 496 }
497} 497}
498 498
499int tcp_v4_gso_send_check(struct sk_buff *skb)
500{
501 struct iphdr *iph;
502 struct tcphdr *th;
503
504 if (!pskb_may_pull(skb, sizeof(*th)))
505 return -EINVAL;
506
507 iph = skb->nh.iph;
508 th = skb->h.th;
509
510 th->check = 0;
511 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
512 skb->csum = offsetof(struct tcphdr, check);
513 skb->ip_summed = CHECKSUM_HW;
514 return 0;
515}
516
499/* 517/*
500 * This routine will send an RST to the other tcp. 518 * This routine will send an RST to the other tcp.
501 * 519 *
@@ -1622,10 +1640,9 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
1622 if (unlikely(afinfo == NULL)) 1640 if (unlikely(afinfo == NULL))
1623 return -EINVAL; 1641 return -EINVAL;
1624 1642
1625 s = kmalloc(sizeof(*s), GFP_KERNEL); 1643 s = kzalloc(sizeof(*s), GFP_KERNEL);
1626 if (!s) 1644 if (!s)
1627 return -ENOMEM; 1645 return -ENOMEM;
1628 memset(s, 0, sizeof(*s));
1629 s->family = afinfo->family; 1646 s->family = afinfo->family;
1630 s->seq_ops.start = tcp_seq_start; 1647 s->seq_ops.start = tcp_seq_start;
1631 s->seq_ops.next = tcp_seq_next; 1648 s->seq_ops.next = tcp_seq_next;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9bfcddad695b..f136cec96d95 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1468,11 +1468,10 @@ static int udp_seq_open(struct inode *inode, struct file *file)
1468 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 1468 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
1469 struct seq_file *seq; 1469 struct seq_file *seq;
1470 int rc = -ENOMEM; 1470 int rc = -ENOMEM;
1471 struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 1471 struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1472 1472
1473 if (!s) 1473 if (!s)
1474 goto out; 1474 goto out;
1475 memset(s, 0, sizeof(*s));
1476 s->family = afinfo->family; 1475 s->family = afinfo->family;
1477 s->seq_ops.start = udp_seq_start; 1476 s->seq_ops.start = udp_seq_start;
1478 s->seq_ops.next = udp_seq_next; 1477 s->seq_ops.next = udp_seq_next;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f8d880beb12f..13cafbe56ce3 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,6 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92 skb->mac.raw = memmove(skb->data - skb->mac_len, 92 skb->mac.raw = memmove(skb->data - skb->mac_len,
93 skb->mac.raw, skb->mac_len); 93 skb->mac.raw, skb->mac_len);
94 skb->nh.raw = skb->data; 94 skb->nh.raw = skb->data;
95 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
96 err = 0; 95 err = 0;
97 96
98out: 97out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 193363e22932..d16f863cf687 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -134,7 +134,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
134 } 134 }
135#endif 135#endif
136 136
137 if (!skb_shinfo(skb)->gso_size) 137 if (!skb_is_gso(skb))
138 return xfrm4_output_finish2(skb); 138 return xfrm4_output_finish2(skb);
139 139
140 skb->protocol = htons(ETH_P_IP); 140 skb->protocol = htons(ETH_P_IP);