aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/ebt_mark.c21
-rw-r--r--net/core/neighbour.c12
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ipv4/Kconfig9
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/esp4.c26
-rw-r--r--net/ipv4/ipcomp.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c10
-rw-r--r--net/ipv4/netfilter.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c3
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c97
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c3
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_mode_beet.c139
-rw-r--r--net/ipv6/Kconfig10
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/ipcomp6.c5
-rw-r--r--net/ipv6/udp.c64
-rw-r--r--net/ipv6/xfrm6_mode_beet.c107
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/sched/estimator.c196
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c67
-rw-r--r--net/sunrpc/svc.c79
-rw-r--r--net/sunrpc/svcauth_unix.c47
-rw-r--r--net/sunrpc/svcsock.c51
-rw-r--r--net/tipc/link.c5
-rw-r--r--net/xfrm/xfrm_hash.h7
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--net/xfrm/xfrm_state.c16
-rw-r--r--net/xfrm/xfrm_user.c1
32 files changed, 577 insertions, 432 deletions
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 770c0df972a3..b54306a934e5 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
22 const void *data, unsigned int datalen) 22 const void *data, unsigned int datalen)
23{ 23{
24 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; 24 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
25 int action = info->target & -16;
25 26
26 if ((*pskb)->nfmark != info->mark) 27 if (action == MARK_SET_VALUE)
27 (*pskb)->nfmark = info->mark; 28 (*pskb)->nfmark = info->mark;
29 else if (action == MARK_OR_VALUE)
30 (*pskb)->nfmark |= info->mark;
31 else if (action == MARK_AND_VALUE)
32 (*pskb)->nfmark &= info->mark;
33 else
34 (*pskb)->nfmark ^= info->mark;
28 35
29 return info->target; 36 return info->target | -16;
30} 37}
31 38
32static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, 39static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
33 const struct ebt_entry *e, void *data, unsigned int datalen) 40 const struct ebt_entry *e, void *data, unsigned int datalen)
34{ 41{
35 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; 42 struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
43 int tmp;
36 44
37 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) 45 if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
38 return -EINVAL; 46 return -EINVAL;
39 if (BASE_CHAIN && info->target == EBT_RETURN) 47 tmp = info->target | -16;
48 if (BASE_CHAIN && tmp == EBT_RETURN)
40 return -EINVAL; 49 return -EINVAL;
41 CLEAR_BASE_CHAIN_BIT; 50 CLEAR_BASE_CHAIN_BIT;
42 if (INVALID_TARGET) 51 if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
52 return -EINVAL;
53 tmp = info->target & -16;
54 if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
55 tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
43 return -EINVAL; 56 return -EINVAL;
44 return 0; 57 return 0;
45} 58}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8ce8c471d868..b4b478353b27 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
344{ 344{
345 struct neighbour *n; 345 struct neighbour *n;
346 int key_len = tbl->key_len; 346 int key_len = tbl->key_len;
347 u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; 347 u32 hash_val = tbl->hash(pkey, dev);
348 348
349 NEIGH_CACHE_STAT_INC(tbl, lookups); 349 NEIGH_CACHE_STAT_INC(tbl, lookups);
350 350
351 read_lock_bh(&tbl->lock); 351 read_lock_bh(&tbl->lock);
352 for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { 352 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
353 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { 353 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
354 neigh_hold(n); 354 neigh_hold(n);
355 NEIGH_CACHE_STAT_INC(tbl, hits); 355 NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
364{ 364{
365 struct neighbour *n; 365 struct neighbour *n;
366 int key_len = tbl->key_len; 366 int key_len = tbl->key_len;
367 u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; 367 u32 hash_val = tbl->hash(pkey, NULL);
368 368
369 NEIGH_CACHE_STAT_INC(tbl, lookups); 369 NEIGH_CACHE_STAT_INC(tbl, lookups);
370 370
371 read_lock_bh(&tbl->lock); 371 read_lock_bh(&tbl->lock);
372 for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { 372 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
373 if (!memcmp(n->primary_key, pkey, key_len)) { 373 if (!memcmp(n->primary_key, pkey, key_len)) {
374 neigh_hold(n); 374 neigh_hold(n);
375 NEIGH_CACHE_STAT_INC(tbl, hits); 375 NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1998 int rc, h, s_h = cb->args[1]; 1998 int rc, h, s_h = cb->args[1];
1999 int idx, s_idx = idx = cb->args[2]; 1999 int idx, s_idx = idx = cb->args[2];
2000 2000
2001 read_lock_bh(&tbl->lock);
2001 for (h = 0; h <= tbl->hash_mask; h++) { 2002 for (h = 0; h <= tbl->hash_mask; h++) {
2002 if (h < s_h) 2003 if (h < s_h)
2003 continue; 2004 continue;
2004 if (h > s_h) 2005 if (h > s_h)
2005 s_idx = 0; 2006 s_idx = 0;
2006 read_lock_bh(&tbl->lock);
2007 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { 2007 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
2008 if (idx < s_idx) 2008 if (idx < s_idx)
2009 continue; 2009 continue;
@@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2016 goto out; 2016 goto out;
2017 } 2017 }
2018 } 2018 }
2019 read_unlock_bh(&tbl->lock);
2020 } 2019 }
2020 read_unlock_bh(&tbl->lock);
2021 rc = skb->len; 2021 rc = skb->len;
2022out: 2022out:
2023 cb->args[1] = h; 2023 cb->args[1] = h;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c448c7f6fde2..3c23760c5827 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -156,7 +156,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
156 156
157 /* Get the DATA. Size must match skb_add_mtu(). */ 157 /* Get the DATA. Size must match skb_add_mtu(). */
158 size = SKB_DATA_ALIGN(size); 158 size = SKB_DATA_ALIGN(size);
159 data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 159 data = kmalloc_track_caller(size + sizeof(struct skb_shared_info),
160 gfp_mask);
160 if (!data) 161 if (!data)
161 goto nodata; 162 goto nodata;
162 163
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index d172a9804448..5572071af735 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL
434 434
435 If unsure, say Y. 435 If unsure, say Y.
436 436
437config INET_XFRM_MODE_BEET
438 tristate "IP: IPsec BEET mode"
439 default y
440 select XFRM
441 ---help---
442 Support for IPsec BEET mode.
443
444 If unsure, say Y.
445
437config INET_DIAG 446config INET_DIAG
438 tristate "INET: socket monitoring interface" 447 tristate "INET: socket monitoring interface"
439 default y 448 default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f66049e28aeb..15645c51520c 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o
23obj-$(CONFIG_INET_ESP) += esp4.o 23obj-$(CONFIG_INET_ESP) += esp4.o
24obj-$(CONFIG_INET_IPCOMP) += ipcomp.o 24obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
25obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o 25obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
26obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
26obj-$(CONFIG_INET_TUNNEL) += tunnel4.o 27obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
27obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o 28obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
28obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o 29obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 13b29360d102..b5c205b57669 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
253 * as per draft-ietf-ipsec-udp-encaps-06, 253 * as per draft-ietf-ipsec-udp-encaps-06,
254 * section 3.1.2 254 * section 3.1.2
255 */ 255 */
256 if (x->props.mode == XFRM_MODE_TRANSPORT) 256 if (x->props.mode == XFRM_MODE_TRANSPORT ||
257 x->props.mode == XFRM_MODE_BEET)
257 skb->ip_summed = CHECKSUM_UNNECESSARY; 258 skb->ip_summed = CHECKSUM_UNNECESSARY;
258 } 259 }
259 260
@@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
271{ 272{
272 struct esp_data *esp = x->data; 273 struct esp_data *esp = x->data;
273 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 274 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
274 275 int enclen = 0;
275 if (x->props.mode == XFRM_MODE_TUNNEL) { 276
276 mtu = ALIGN(mtu + 2, blksize); 277 switch (x->props.mode) {
277 } else { 278 case XFRM_MODE_TUNNEL:
278 /* The worst case. */ 279 mtu = ALIGN(mtu +2, blksize);
280 break;
281 default:
282 case XFRM_MODE_TRANSPORT:
283 /* The worst case */
279 mtu = ALIGN(mtu + 2, 4) + blksize - 4; 284 mtu = ALIGN(mtu + 2, 4) + blksize - 4;
285 break;
286 case XFRM_MODE_BEET:
287 /* The worst case. */
288 enclen = IPV4_BEET_PHMAXLEN;
289 mtu = ALIGN(mtu + enclen + 2, blksize);
290 break;
280 } 291 }
292
281 if (esp->conf.padlen) 293 if (esp->conf.padlen)
282 mtu = ALIGN(mtu, esp->conf.padlen); 294 mtu = ALIGN(mtu, esp->conf.padlen);
283 295
284 return mtu + x->props.header_len + esp->auth.icv_trunc_len; 296 return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
285} 297}
286 298
287static void esp4_err(struct sk_buff *skb, u32 info) 299static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2017d36024d4..3839b706142e 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
206static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) 206static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
207{ 207{
208 struct xfrm_state *t; 208 struct xfrm_state *t;
209 u8 mode = XFRM_MODE_TUNNEL;
209 210
210 t = xfrm_state_alloc(); 211 t = xfrm_state_alloc();
211 if (t == NULL) 212 if (t == NULL)
@@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
216 t->id.daddr.a4 = x->id.daddr.a4; 217 t->id.daddr.a4 = x->id.daddr.a4;
217 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 218 memcpy(&t->sel, &x->sel, sizeof(t->sel));
218 t->props.family = AF_INET; 219 t->props.family = AF_INET;
219 t->props.mode = XFRM_MODE_TUNNEL; 220 if (x->props.mode == XFRM_MODE_BEET)
221 mode = x->props.mode;
222 t->props.mode = mode;
220 t->props.saddr.a4 = x->props.saddr.a4; 223 t->props.saddr.a4 = x->props.saddr.a4;
221 t->props.flags = x->props.flags; 224 t->props.flags = x->props.flags;
222 225
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 6dee03935f78..1445bb47fea4 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
813 skb->nh.iph->saddr = cp->vaddr; 813 skb->nh.iph->saddr = cp->vaddr;
814 ip_send_check(skb->nh.iph); 814 ip_send_check(skb->nh.iph);
815 815
816 /* For policy routing, packets originating from this
817 * machine itself may be routed differently to packets
818 * passing through. We want this packet to be routed as
819 * if it came from this machine itself. So re-compute
820 * the routing information.
821 */
822 if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
823 goto drop;
824 skb = *pskb;
825
816 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); 826 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
817 827
818 ip_vs_out_stats(cp, skb); 828 ip_vs_out_stats(cp, skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5ac15379a0cf..e2005c6810a4 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -8,7 +8,7 @@
8#include <net/ip.h> 8#include <net/ip.h>
9 9
10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
11int ip_route_me_harder(struct sk_buff **pskb) 11int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
12{ 12{
13 struct iphdr *iph = (*pskb)->nh.iph; 13 struct iphdr *iph = (*pskb)->nh.iph;
14 struct rtable *rt; 14 struct rtable *rt;
@@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
16 struct dst_entry *odst; 16 struct dst_entry *odst;
17 unsigned int hh_len; 17 unsigned int hh_len;
18 18
19 if (addr_type == RTN_UNSPEC)
20 addr_type = inet_addr_type(iph->saddr);
21
19 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 22 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
20 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. 23 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
21 */ 24 */
22 if (inet_addr_type(iph->saddr) == RTN_LOCAL) { 25 if (addr_type == RTN_LOCAL) {
23 fl.nl_u.ip4_u.daddr = iph->daddr; 26 fl.nl_u.ip4_u.daddr = iph->daddr;
24 fl.nl_u.ip4_u.saddr = iph->saddr; 27 fl.nl_u.ip4_u.saddr = iph->saddr;
25 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); 28 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
@@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
156 if (!(iph->tos == rt_info->tos 159 if (!(iph->tos == rt_info->tos
157 && iph->daddr == rt_info->daddr 160 && iph->daddr == rt_info->daddr
158 && iph->saddr == rt_info->saddr)) 161 && iph->saddr == rt_info->saddr))
159 return ip_route_me_harder(pskb); 162 return ip_route_me_harder(pskb, RTN_UNSPEC);
160 } 163 }
161 return 0; 164 return 0;
162} 165}
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 021395b67463..d85d2de50449 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum,
265 ct->tuplehash[!dir].tuple.src.u.all 265 ct->tuplehash[!dir].tuple.src.u.all
266#endif 266#endif
267 ) 267 )
268 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; 268 if (ip_route_me_harder(pskb, RTN_UNSPEC))
269 ret = NF_DROP;
269 } 270 }
270 return ret; 271 return ret;
271} 272}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index fd0c05efed8a..ad0312d0e4fd 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module");
38#define DEBUGP(format, args...) 38#define DEBUGP(format, args...)
39#endif 39#endif
40 40
41static inline struct rtable *route_reverse(struct sk_buff *skb,
42 struct tcphdr *tcph, int hook)
43{
44 struct iphdr *iph = skb->nh.iph;
45 struct dst_entry *odst;
46 struct flowi fl = {};
47 struct rtable *rt;
48
49 /* We don't require ip forwarding to be enabled to be able to
50 * send a RST reply for bridged traffic. */
51 if (hook != NF_IP_FORWARD
52#ifdef CONFIG_BRIDGE_NETFILTER
53 || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
54#endif
55 ) {
56 fl.nl_u.ip4_u.daddr = iph->saddr;
57 if (hook == NF_IP_LOCAL_IN)
58 fl.nl_u.ip4_u.saddr = iph->daddr;
59 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
60
61 if (ip_route_output_key(&rt, &fl) != 0)
62 return NULL;
63 } else {
64 /* non-local src, find valid iif to satisfy
65 * rp-filter when calling ip_route_input. */
66 fl.nl_u.ip4_u.daddr = iph->daddr;
67 if (ip_route_output_key(&rt, &fl) != 0)
68 return NULL;
69
70 odst = skb->dst;
71 if (ip_route_input(skb, iph->saddr, iph->daddr,
72 RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
73 dst_release(&rt->u.dst);
74 return NULL;
75 }
76 dst_release(&rt->u.dst);
77 rt = (struct rtable *)skb->dst;
78 skb->dst = odst;
79
80 fl.nl_u.ip4_u.daddr = iph->saddr;
81 fl.nl_u.ip4_u.saddr = iph->daddr;
82 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
83 }
84
85 if (rt->u.dst.error) {
86 dst_release(&rt->u.dst);
87 return NULL;
88 }
89
90 fl.proto = IPPROTO_TCP;
91 fl.fl_ip_sport = tcph->dest;
92 fl.fl_ip_dport = tcph->source;
93 security_skb_classify_flow(skb, &fl);
94
95 xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
96
97 return rt;
98}
99
100/* Send RST reply */ 41/* Send RST reply */
101static void send_reset(struct sk_buff *oldskb, int hook) 42static void send_reset(struct sk_buff *oldskb, int hook)
102{ 43{
103 struct sk_buff *nskb; 44 struct sk_buff *nskb;
104 struct iphdr *iph = oldskb->nh.iph; 45 struct iphdr *iph = oldskb->nh.iph;
105 struct tcphdr _otcph, *oth, *tcph; 46 struct tcphdr _otcph, *oth, *tcph;
106 struct rtable *rt;
107 __be16 tmp_port; 47 __be16 tmp_port;
108 __be32 tmp_addr; 48 __be32 tmp_addr;
109 int needs_ack; 49 int needs_ack;
110 int hh_len; 50 unsigned int addr_type;
111 51
112 /* IP header checks: fragment. */ 52 /* IP header checks: fragment. */
113 if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) 53 if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
@@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
126 if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) 66 if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
127 return; 67 return;
128 68
129 if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
130 return;
131
132 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
133
134 /* We need a linear, writeable skb. We also need to expand 69 /* We need a linear, writeable skb. We also need to expand
135 headroom in case hh_len of incoming interface < hh_len of 70 headroom in case hh_len of incoming interface < hh_len of
136 outgoing interface */ 71 outgoing interface */
137 nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), 72 nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
138 GFP_ATOMIC); 73 GFP_ATOMIC);
139 if (!nskb) { 74 if (!nskb)
140 dst_release(&rt->u.dst);
141 return; 75 return;
142 }
143
144 dst_release(nskb->dst);
145 nskb->dst = &rt->u.dst;
146 76
147 /* This packet will not be the same as the other: clear nf fields */ 77 /* This packet will not be the same as the other: clear nf fields */
148 nf_reset(nskb); 78 nf_reset(nskb);
@@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
184 tcph->window = 0; 114 tcph->window = 0;
185 tcph->urg_ptr = 0; 115 tcph->urg_ptr = 0;
186 116
117 /* Set DF, id = 0 */
118 nskb->nh.iph->frag_off = htons(IP_DF);
119 nskb->nh.iph->id = 0;
120
121 addr_type = RTN_UNSPEC;
122 if (hook != NF_IP_FORWARD
123#ifdef CONFIG_BRIDGE_NETFILTER
124 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
125#endif
126 )
127 addr_type = RTN_LOCAL;
128
129 if (ip_route_me_harder(&nskb, addr_type))
130 goto free_nskb;
131
187 /* Adjust TCP checksum */ 132 /* Adjust TCP checksum */
188 nskb->ip_summed = CHECKSUM_NONE; 133 nskb->ip_summed = CHECKSUM_NONE;
189 tcph->check = 0; 134 tcph->check = 0;
@@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
192 nskb->nh.iph->daddr, 137 nskb->nh.iph->daddr,
193 csum_partial((char *)tcph, 138 csum_partial((char *)tcph,
194 sizeof(struct tcphdr), 0)); 139 sizeof(struct tcphdr), 0));
195 140 /* Adjust IP TTL */
196 /* Adjust IP TTL, DF */
197 nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); 141 nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
198 /* Set DF, id = 0 */
199 nskb->nh.iph->frag_off = htons(IP_DF);
200 nskb->nh.iph->id = 0;
201 142
202 /* Adjust IP checksum */ 143 /* Adjust IP checksum */
203 nskb->nh.iph->check = 0; 144 nskb->nh.iph->check = 0;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index e62ea2bb9c0a..b91f3582359b 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook,
157 || (*pskb)->nfmark != nfmark 157 || (*pskb)->nfmark != nfmark
158#endif 158#endif
159 || (*pskb)->nh.iph->tos != tos)) 159 || (*pskb)->nh.iph->tos != tos))
160 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; 160 if (ip_route_me_harder(pskb, RTN_UNSPEC))
161 ret = NF_DROP;
161 162
162 return ret; 163 return ret;
163} 164}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f884cea14ff..cf06accbe687 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2259 u32 pkts_acked = 0; 2259 u32 pkts_acked = 0;
2260 void (*rtt_sample)(struct sock *sk, u32 usrtt) 2260 void (*rtt_sample)(struct sock *sk, u32 usrtt)
2261 = icsk->icsk_ca_ops->rtt_sample; 2261 = icsk->icsk_ca_ops->rtt_sample;
2262 struct timeval tv; 2262 struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
2263 2263
2264 while ((skb = skb_peek(&sk->sk_write_queue)) && 2264 while ((skb = skb_peek(&sk->sk_write_queue)) &&
2265 skb != sk->sk_send_head) { 2265 skb != sk->sk_send_head) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6d6142f9c478..865d75214a9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -675,6 +675,8 @@ do_append_data:
675 udp_flush_pending_frames(sk); 675 udp_flush_pending_frames(sk);
676 else if (!corkreq) 676 else if (!corkreq)
677 err = udp_push_pending_frames(sk, up); 677 err = udp_push_pending_frames(sk, up);
678 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
679 up->pending = 0;
678 release_sock(sk); 680 release_sock(sk);
679 681
680out: 682out:
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
new file mode 100644
index 000000000000..89cf59ea7bbe
--- /dev/null
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -0,0 +1,139 @@
1/*
2 * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
3 *
4 * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
5 * Miika Komu <miika@iki.fi>
6 * Herbert Xu <herbert@gondor.apana.org.au>
7 * Abhinav Pathak <abhinav.pathak@hiit.fi>
8 * Jeff Ahrenholz <ahrenholz@gmail.com>
9 */
10
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/stringify.h>
16#include <net/dst.h>
17#include <net/ip.h>
18#include <net/xfrm.h>
19
20/* Add encapsulation header.
21 *
22 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
23 * The following fields in it shall be filled in by x->type->output:
24 * tot_len
25 * check
26 *
27 * On exit, skb->h will be set to the start of the payload to be processed
28 * by x->type->output and skb->nh will be set to the top IP header.
29 */
30static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
31{
32 struct iphdr *iph, *top_iph = NULL;
33 int hdrlen, optlen;
34
35 iph = skb->nh.iph;
36 skb->h.ipiph = iph;
37
38 hdrlen = 0;
39 optlen = iph->ihl * 4 - sizeof(*iph);
40 if (unlikely(optlen))
41 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
42
43 skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
44 top_iph = skb->nh.iph;
45 hdrlen = iph->ihl * 4 - optlen;
46 skb->h.raw += hdrlen;
47
48 memmove(top_iph, iph, hdrlen);
49 if (unlikely(optlen)) {
50 struct ip_beet_phdr *ph;
51
52 BUG_ON(optlen < 0);
53
54 ph = (struct ip_beet_phdr *)skb->h.raw;
55 ph->padlen = 4 - (optlen & 4);
56 ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
57 ph->nexthdr = top_iph->protocol;
58
59 top_iph->protocol = IPPROTO_BEETPH;
60 top_iph->ihl = sizeof(struct iphdr) / 4;
61 }
62
63 top_iph->saddr = x->props.saddr.a4;
64 top_iph->daddr = x->id.daddr.a4;
65
66 return 0;
67}
68
69static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
70{
71 struct iphdr *iph = skb->nh.iph;
72 int phlen = 0;
73 int optlen = 0;
74 __u8 ph_nexthdr = 0, protocol = 0;
75 int err = -EINVAL;
76
77 protocol = iph->protocol;
78
79 if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
80 struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
81
82 if (!pskb_may_pull(skb, sizeof(*ph)))
83 goto out;
84
85 phlen = ph->hdrlen * 8;
86 optlen = phlen - ph->padlen - sizeof(*ph);
87 if (optlen < 0 || optlen & 3 || optlen > 250)
88 goto out;
89
90 if (!pskb_may_pull(skb, phlen))
91 goto out;
92
93 ph_nexthdr = ph->nexthdr;
94 }
95
96 skb_push(skb, sizeof(*iph) - phlen + optlen);
97 memmove(skb->data, skb->nh.raw, sizeof(*iph));
98 skb->nh.raw = skb->data;
99
100 iph = skb->nh.iph;
101 iph->ihl = (sizeof(*iph) + optlen) / 4;
102 iph->tot_len = htons(skb->len);
103 iph->daddr = x->sel.daddr.a4;
104 iph->saddr = x->sel.saddr.a4;
105 if (ph_nexthdr)
106 iph->protocol = ph_nexthdr;
107 else
108 iph->protocol = protocol;
109 iph->check = 0;
110 iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
111 err = 0;
112out:
113 return err;
114}
115
116static struct xfrm_mode xfrm4_beet_mode = {
117 .input = xfrm4_beet_input,
118 .output = xfrm4_beet_output,
119 .owner = THIS_MODULE,
120 .encap = XFRM_MODE_BEET,
121};
122
123static int __init xfrm4_beet_init(void)
124{
125 return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
126}
127
128static void __exit xfrm4_beet_exit(void)
129{
130 int err;
131
132 err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
133 BUG_ON(err);
134}
135
136module_init(xfrm4_beet_init);
137module_exit(xfrm4_beet_exit);
138MODULE_LICENSE("GPL");
139MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a2d211da2aba..a460e8132b4d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL
136 136
137 If unsure, say Y. 137 If unsure, say Y.
138 138
139config INET6_XFRM_MODE_BEET
140 tristate "IPv6: IPsec BEET mode"
141 depends on IPV6
142 default IPV6
143 select XFRM
144 ---help---
145 Support for IPsec BEET mode.
146
147 If unsure, say Y.
148
139config INET6_XFRM_MODE_ROUTEOPTIMIZATION 149config INET6_XFRM_MODE_ROUTEOPTIMIZATION
140 tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" 150 tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
141 depends on IPV6 && EXPERIMENTAL 151 depends on IPV6 && EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 0213c6612b58..87274e47fe32 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
26obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o 26obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
27obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o 27obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
28obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o 28obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
29obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
29obj-$(CONFIG_NETFILTER) += netfilter/ 30obj-$(CONFIG_NETFILTER) += netfilter/
30 31
31obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 32obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index a2860e35efd7..71f59f18ede8 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
199static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) 199static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
200{ 200{
201 struct xfrm_state *t = NULL; 201 struct xfrm_state *t = NULL;
202 u8 mode = XFRM_MODE_TUNNEL;
202 203
203 t = xfrm_state_alloc(); 204 t = xfrm_state_alloc();
204 if (!t) 205 if (!t)
@@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
212 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); 213 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
213 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 214 memcpy(&t->sel, &x->sel, sizeof(t->sel));
214 t->props.family = AF_INET6; 215 t->props.family = AF_INET6;
215 t->props.mode = XFRM_MODE_TUNNEL; 216 if (x->props.mode == XFRM_MODE_BEET)
217 mode = x->props.mode;
218 t->props.mode = mode;
216 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 219 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
217 220
218 if (xfrm_init_state(t)) 221 if (xfrm_init_state(t))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9662561701d1..e0c3934a7e4b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
546 struct in6_addr *daddr, *final_p = NULL, final; 546 struct in6_addr *daddr, *final_p = NULL, final;
547 struct ipv6_txoptions *opt = NULL; 547 struct ipv6_txoptions *opt = NULL;
548 struct ip6_flowlabel *flowlabel = NULL; 548 struct ip6_flowlabel *flowlabel = NULL;
549 struct flowi *fl = &inet->cork.fl; 549 struct flowi fl;
550 struct dst_entry *dst; 550 struct dst_entry *dst;
551 int addr_len = msg->msg_namelen; 551 int addr_len = msg->msg_namelen;
552 int ulen = len; 552 int ulen = len;
@@ -626,19 +626,19 @@ do_udp_sendmsg:
626 } 626 }
627 ulen += sizeof(struct udphdr); 627 ulen += sizeof(struct udphdr);
628 628
629 memset(fl, 0, sizeof(*fl)); 629 memset(&fl, 0, sizeof(fl));
630 630
631 if (sin6) { 631 if (sin6) {
632 if (sin6->sin6_port == 0) 632 if (sin6->sin6_port == 0)
633 return -EINVAL; 633 return -EINVAL;
634 634
635 fl->fl_ip_dport = sin6->sin6_port; 635 fl.fl_ip_dport = sin6->sin6_port;
636 daddr = &sin6->sin6_addr; 636 daddr = &sin6->sin6_addr;
637 637
638 if (np->sndflow) { 638 if (np->sndflow) {
639 fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 639 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
640 if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 640 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
641 flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); 641 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
642 if (flowlabel == NULL) 642 if (flowlabel == NULL)
643 return -EINVAL; 643 return -EINVAL;
644 daddr = &flowlabel->dst; 644 daddr = &flowlabel->dst;
@@ -656,32 +656,32 @@ do_udp_sendmsg:
656 if (addr_len >= sizeof(struct sockaddr_in6) && 656 if (addr_len >= sizeof(struct sockaddr_in6) &&
657 sin6->sin6_scope_id && 657 sin6->sin6_scope_id &&
658 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 658 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
659 fl->oif = sin6->sin6_scope_id; 659 fl.oif = sin6->sin6_scope_id;
660 } else { 660 } else {
661 if (sk->sk_state != TCP_ESTABLISHED) 661 if (sk->sk_state != TCP_ESTABLISHED)
662 return -EDESTADDRREQ; 662 return -EDESTADDRREQ;
663 663
664 fl->fl_ip_dport = inet->dport; 664 fl.fl_ip_dport = inet->dport;
665 daddr = &np->daddr; 665 daddr = &np->daddr;
666 fl->fl6_flowlabel = np->flow_label; 666 fl.fl6_flowlabel = np->flow_label;
667 connected = 1; 667 connected = 1;
668 } 668 }
669 669
670 if (!fl->oif) 670 if (!fl.oif)
671 fl->oif = sk->sk_bound_dev_if; 671 fl.oif = sk->sk_bound_dev_if;
672 672
673 if (msg->msg_controllen) { 673 if (msg->msg_controllen) {
674 opt = &opt_space; 674 opt = &opt_space;
675 memset(opt, 0, sizeof(struct ipv6_txoptions)); 675 memset(opt, 0, sizeof(struct ipv6_txoptions));
676 opt->tot_len = sizeof(*opt); 676 opt->tot_len = sizeof(*opt);
677 677
678 err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); 678 err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
679 if (err < 0) { 679 if (err < 0) {
680 fl6_sock_release(flowlabel); 680 fl6_sock_release(flowlabel);
681 return err; 681 return err;
682 } 682 }
683 if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 683 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
684 flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); 684 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
685 if (flowlabel == NULL) 685 if (flowlabel == NULL)
686 return -EINVAL; 686 return -EINVAL;
687 } 687 }
@@ -695,39 +695,39 @@ do_udp_sendmsg:
695 opt = fl6_merge_options(&opt_space, flowlabel, opt); 695 opt = fl6_merge_options(&opt_space, flowlabel, opt);
696 opt = ipv6_fixup_options(&opt_space, opt); 696 opt = ipv6_fixup_options(&opt_space, opt);
697 697
698 fl->proto = IPPROTO_UDP; 698 fl.proto = IPPROTO_UDP;
699 ipv6_addr_copy(&fl->fl6_dst, daddr); 699 ipv6_addr_copy(&fl.fl6_dst, daddr);
700 if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr)) 700 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
701 ipv6_addr_copy(&fl->fl6_src, &np->saddr); 701 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
702 fl->fl_ip_sport = inet->sport; 702 fl.fl_ip_sport = inet->sport;
703 703
704 /* merge ip6_build_xmit from ip6_output */ 704 /* merge ip6_build_xmit from ip6_output */
705 if (opt && opt->srcrt) { 705 if (opt && opt->srcrt) {
706 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 706 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
707 ipv6_addr_copy(&final, &fl->fl6_dst); 707 ipv6_addr_copy(&final, &fl.fl6_dst);
708 ipv6_addr_copy(&fl->fl6_dst, rt0->addr); 708 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
709 final_p = &final; 709 final_p = &final;
710 connected = 0; 710 connected = 0;
711 } 711 }
712 712
713 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { 713 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
714 fl->oif = np->mcast_oif; 714 fl.oif = np->mcast_oif;
715 connected = 0; 715 connected = 0;
716 } 716 }
717 717
718 security_sk_classify_flow(sk, fl); 718 security_sk_classify_flow(sk, &fl);
719 719
720 err = ip6_sk_dst_lookup(sk, &dst, fl); 720 err = ip6_sk_dst_lookup(sk, &dst, &fl);
721 if (err) 721 if (err)
722 goto out; 722 goto out;
723 if (final_p) 723 if (final_p)
724 ipv6_addr_copy(&fl->fl6_dst, final_p); 724 ipv6_addr_copy(&fl.fl6_dst, final_p);
725 725
726 if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) 726 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
727 goto out; 727 goto out;
728 728
729 if (hlimit < 0) { 729 if (hlimit < 0) {
730 if (ipv6_addr_is_multicast(&fl->fl6_dst)) 730 if (ipv6_addr_is_multicast(&fl.fl6_dst))
731 hlimit = np->mcast_hops; 731 hlimit = np->mcast_hops;
732 else 732 else
733 hlimit = np->hop_limit; 733 hlimit = np->hop_limit;
@@ -763,21 +763,23 @@ back_from_confirm:
763do_append_data: 763do_append_data:
764 up->len += ulen; 764 up->len += ulen;
765 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 765 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
766 sizeof(struct udphdr), hlimit, tclass, opt, fl, 766 sizeof(struct udphdr), hlimit, tclass, opt, &fl,
767 (struct rt6_info*)dst, 767 (struct rt6_info*)dst,
768 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 768 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
769 if (err) 769 if (err)
770 udp_v6_flush_pending_frames(sk); 770 udp_v6_flush_pending_frames(sk);
771 else if (!corkreq) 771 else if (!corkreq)
772 err = udp_v6_push_pending_frames(sk, up); 772 err = udp_v6_push_pending_frames(sk, up);
773 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
774 up->pending = 0;
773 775
774 if (dst) { 776 if (dst) {
775 if (connected) { 777 if (connected) {
776 ip6_dst_store(sk, dst, 778 ip6_dst_store(sk, dst,
777 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? 779 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
778 &np->daddr : NULL, 780 &np->daddr : NULL,
779#ifdef CONFIG_IPV6_SUBTREES 781#ifdef CONFIG_IPV6_SUBTREES
780 ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? 782 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
781 &np->saddr : 783 &np->saddr :
782#endif 784#endif
783 NULL); 785 NULL);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
new file mode 100644
index 000000000000..edcfffa9e87b
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -0,0 +1,107 @@
1/*
2 * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
3 *
4 * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
5 * Miika Komu <miika@iki.fi>
6 * Herbert Xu <herbert@gondor.apana.org.au>
7 * Abhinav Pathak <abhinav.pathak@hiit.fi>
8 * Jeff Ahrenholz <ahrenholz@gmail.com>
9 */
10
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/stringify.h>
16#include <net/dsfield.h>
17#include <net/dst.h>
18#include <net/inet_ecn.h>
19#include <net/ipv6.h>
20#include <net/xfrm.h>
21
22/* Add encapsulation header.
23 *
24 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
25 * The following fields in it shall be filled in by x->type->output:
26 * payload_len
27 *
28 * On exit, skb->h will be set to the start of the encapsulation header to be
29 * filled in by x->type->output and skb->nh will be set to the nextheader field
30 * of the extension header directly preceding the encapsulation header, or in
31 * its absence, that of the top IP header. The value of skb->data will always
32 * point to the top IP header.
33 */
34static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
35{
36 struct ipv6hdr *iph, *top_iph;
37 u8 *prevhdr;
38 int hdr_len;
39
40 skb_push(skb, x->props.header_len);
41 iph = skb->nh.ipv6h;
42
43 hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
44 skb->nh.raw = prevhdr - x->props.header_len;
45 skb->h.raw = skb->data + hdr_len;
46 memmove(skb->data, iph, hdr_len);
47
48 skb->nh.raw = skb->data;
49 top_iph = skb->nh.ipv6h;
50 skb->nh.raw = &top_iph->nexthdr;
51 skb->h.ipv6h = top_iph + 1;
52
53 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
54 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
55
56 return 0;
57}
58
59static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
60{
61 struct ipv6hdr *ip6h;
62 int size = sizeof(struct ipv6hdr);
63 int err = -EINVAL;
64
65 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
66 goto out;
67
68 skb_push(skb, size);
69 memmove(skb->data, skb->nh.raw, size);
70 skb->nh.raw = skb->data;
71
72 skb->mac.raw = memmove(skb->data - skb->mac_len,
73 skb->mac.raw, skb->mac_len);
74
75 ip6h = skb->nh.ipv6h;
76 ip6h->payload_len = htons(skb->len - size);
77 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
78 ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
79 err = 0;
80out:
81 return err;
82}
83
84static struct xfrm_mode xfrm6_beet_mode = {
85 .input = xfrm6_beet_input,
86 .output = xfrm6_beet_output,
87 .owner = THIS_MODULE,
88 .encap = XFRM_MODE_BEET,
89};
90
91static int __init xfrm6_beet_init(void)
92{
93 return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
94}
95
96static void __exit xfrm6_beet_exit(void)
97{
98 int err;
99
100 err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
101 BUG_ON(err);
102}
103
104module_init(xfrm6_beet_init);
105module_exit(xfrm6_beet_exit);
106MODULE_LICENSE("GPL");
107MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0a28d2c5c44f..ce94732b8e23 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
365 365
366config NETFILTER_XT_MATCH_PHYSDEV 366config NETFILTER_XT_MATCH_PHYSDEV
367 tristate '"physdev" match support' 367 tristate '"physdev" match support'
368 depends on NETFILTER_XTABLES && BRIDGE_NETFILTER 368 depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
369 help 369 help
370 Physdev packet matching matches against the physical bridge ports 370 Physdev packet matching matches against the physical bridge ports
371 the IP packet arrived on or will leave by. 371 the IP packet arrived on or will leave by.
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
deleted file mode 100644
index 0ebc98e9be2d..000000000000
--- a/net/sched/estimator.c
+++ /dev/null
@@ -1,196 +0,0 @@
1/*
2 * net/sched/estimator.c Simple rate estimator.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12#include <asm/uaccess.h>
13#include <asm/system.h>
14#include <linux/bitops.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/jiffies.h>
19#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/netdevice.h>
27#include <linux/skbuff.h>
28#include <linux/rtnetlink.h>
29#include <linux/init.h>
30#include <net/sock.h>
31#include <net/pkt_sched.h>
32
33/*
34 This code is NOT intended to be used for statistics collection,
35 its purpose is to provide a base for statistical multiplexing
36 for controlled load service.
37 If you need only statistics, run a user level daemon which
38 periodically reads byte counters.
39
40 Unfortunately, rate estimation is not a very easy task.
41 F.e. I did not find a simple way to estimate the current peak rate
42 and even failed to formulate the problem 8)8)
43
44 So I preferred not to built an estimator into the scheduler,
45 but run this task separately.
46 Ideally, it should be kernel thread(s), but for now it runs
47 from timers, which puts apparent top bounds on the number of rated
48 flows, has minimal overhead on small, but is enough
49 to handle controlled load service, sets of aggregates.
50
51 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
52
53 avrate = avrate*(1-W) + rate*W
54
55 where W is chosen as negative power of 2: W = 2^(-ewma_log)
56
57 The resulting time constant is:
58
59 T = A/(-ln(1-W))
60
61
62 NOTES.
63
64 * The stored value for avbps is scaled by 2^5, so that maximal
65 rate is ~1Gbit, avpps is scaled by 2^10.
66
67 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
68 for HZ=100 and HZ=1024 8)), maximal interval
69 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
70 are too expensive, longer ones can be implemented
71 at user level painlessly.
72 */
73
74#define EST_MAX_INTERVAL 5
75
76struct qdisc_estimator
77{
78 struct qdisc_estimator *next;
79 struct tc_stats *stats;
80 spinlock_t *stats_lock;
81 unsigned interval;
82 int ewma_log;
83 u64 last_bytes;
84 u32 last_packets;
85 u32 avpps;
86 u32 avbps;
87};
88
89struct qdisc_estimator_head
90{
91 struct timer_list timer;
92 struct qdisc_estimator *list;
93};
94
95static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
96
97/* Estimator array lock */
98static DEFINE_RWLOCK(est_lock);
99
100static void est_timer(unsigned long arg)
101{
102 int idx = (int)arg;
103 struct qdisc_estimator *e;
104
105 read_lock(&est_lock);
106 for (e = elist[idx].list; e; e = e->next) {
107 struct tc_stats *st = e->stats;
108 u64 nbytes;
109 u32 npackets;
110 u32 rate;
111
112 spin_lock(e->stats_lock);
113 nbytes = st->bytes;
114 npackets = st->packets;
115 rate = (nbytes - e->last_bytes)<<(7 - idx);
116 e->last_bytes = nbytes;
117 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
118 st->bps = (e->avbps+0xF)>>5;
119
120 rate = (npackets - e->last_packets)<<(12 - idx);
121 e->last_packets = npackets;
122 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
123 e->stats->pps = (e->avpps+0x1FF)>>10;
124 spin_unlock(e->stats_lock);
125 }
126
127 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
128 read_unlock(&est_lock);
129}
130
131int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
132{
133 struct qdisc_estimator *est;
134 struct tc_estimator *parm = RTA_DATA(opt);
135
136 if (RTA_PAYLOAD(opt) < sizeof(*parm))
137 return -EINVAL;
138
139 if (parm->interval < -2 || parm->interval > 3)
140 return -EINVAL;
141
142 est = kzalloc(sizeof(*est), GFP_KERNEL);
143 if (est == NULL)
144 return -ENOBUFS;
145
146 est->interval = parm->interval + 2;
147 est->stats = stats;
148 est->stats_lock = stats_lock;
149 est->ewma_log = parm->ewma_log;
150 est->last_bytes = stats->bytes;
151 est->avbps = stats->bps<<5;
152 est->last_packets = stats->packets;
153 est->avpps = stats->pps<<10;
154
155 est->next = elist[est->interval].list;
156 if (est->next == NULL) {
157 init_timer(&elist[est->interval].timer);
158 elist[est->interval].timer.data = est->interval;
159 elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
160 elist[est->interval].timer.function = est_timer;
161 add_timer(&elist[est->interval].timer);
162 }
163 write_lock_bh(&est_lock);
164 elist[est->interval].list = est;
165 write_unlock_bh(&est_lock);
166 return 0;
167}
168
169void qdisc_kill_estimator(struct tc_stats *stats)
170{
171 int idx;
172 struct qdisc_estimator *est, **pest;
173
174 for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
175 int killed = 0;
176 pest = &elist[idx].list;
177 while ((est=*pest) != NULL) {
178 if (est->stats != stats) {
179 pest = &est->next;
180 continue;
181 }
182
183 write_lock_bh(&est_lock);
184 *pest = est->next;
185 write_unlock_bh(&est_lock);
186
187 kfree(est);
188 killed++;
189 }
190 if (killed && elist[idx].list == NULL)
191 del_timer(&elist[idx].timer);
192 }
193}
194
195EXPORT_SYMBOL(qdisc_kill_estimator);
196EXPORT_SYMBOL(qdisc_new_estimator);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6c058e3660c0..bb3ddd4784b1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
391/* If this triggers, it is a bug in this code, but it need not be fatal */ 391/* If this triggers, it is a bug in this code, but it need not be fatal */
392static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) 392static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
393{ 393{
394 if (!RB_EMPTY_NODE(rb)) { 394 if (RB_EMPTY_NODE(rb)) {
395 WARN_ON(1); 395 WARN_ON(1);
396 } else { 396 } else {
397 rb_erase(rb, root); 397 rb_erase(rb, root);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 638c0b576203..447d9aef4605 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -903,9 +903,9 @@ out_seq:
903struct gss_svc_data { 903struct gss_svc_data {
904 /* decoded gss client cred: */ 904 /* decoded gss client cred: */
905 struct rpc_gss_wire_cred clcred; 905 struct rpc_gss_wire_cred clcred;
906 /* pointer to the beginning of the procedure-specific results, 906 /* save a pointer to the beginning of the encoded verifier,
907 * which may be encrypted/checksummed in svcauth_gss_release: */ 907 * for use in encryption/checksumming in svcauth_gss_release: */
908 __be32 *body_start; 908 __be32 *verf_start;
909 struct rsc *rsci; 909 struct rsc *rsci;
910}; 910};
911 911
@@ -968,7 +968,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
968 if (!svcdata) 968 if (!svcdata)
969 goto auth_err; 969 goto auth_err;
970 rqstp->rq_auth_data = svcdata; 970 rqstp->rq_auth_data = svcdata;
971 svcdata->body_start = NULL; 971 svcdata->verf_start = NULL;
972 svcdata->rsci = NULL; 972 svcdata->rsci = NULL;
973 gc = &svcdata->clcred; 973 gc = &svcdata->clcred;
974 974
@@ -1097,6 +1097,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1097 goto complete; 1097 goto complete;
1098 case RPC_GSS_PROC_DATA: 1098 case RPC_GSS_PROC_DATA:
1099 *authp = rpcsec_gsserr_ctxproblem; 1099 *authp = rpcsec_gsserr_ctxproblem;
1100 svcdata->verf_start = resv->iov_base + resv->iov_len;
1100 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) 1101 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
1101 goto auth_err; 1102 goto auth_err;
1102 rqstp->rq_cred = rsci->cred; 1103 rqstp->rq_cred = rsci->cred;
@@ -1110,7 +1111,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1110 gc->gc_seq, rsci->mechctx)) 1111 gc->gc_seq, rsci->mechctx))
1111 goto auth_err; 1112 goto auth_err;
1112 /* placeholders for length and seq. number: */ 1113 /* placeholders for length and seq. number: */
1113 svcdata->body_start = resv->iov_base + resv->iov_len;
1114 svc_putnl(resv, 0); 1114 svc_putnl(resv, 0);
1115 svc_putnl(resv, 0); 1115 svc_putnl(resv, 0);
1116 break; 1116 break;
@@ -1119,7 +1119,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1119 gc->gc_seq, rsci->mechctx)) 1119 gc->gc_seq, rsci->mechctx))
1120 goto auth_err; 1120 goto auth_err;
1121 /* placeholders for length and seq. number: */ 1121 /* placeholders for length and seq. number: */
1122 svcdata->body_start = resv->iov_base + resv->iov_len;
1123 svc_putnl(resv, 0); 1122 svc_putnl(resv, 0);
1124 svc_putnl(resv, 0); 1123 svc_putnl(resv, 0);
1125 break; 1124 break;
@@ -1147,6 +1146,32 @@ out:
1147 return ret; 1146 return ret;
1148} 1147}
1149 1148
1149u32 *
1150svcauth_gss_prepare_to_wrap(struct xdr_buf *resbuf, struct gss_svc_data *gsd)
1151{
1152 u32 *p, verf_len;
1153
1154 p = gsd->verf_start;
1155 gsd->verf_start = NULL;
1156
1157 /* If the reply stat is nonzero, don't wrap: */
1158 if (*(p-1) != rpc_success)
1159 return NULL;
1160 /* Skip the verifier: */
1161 p += 1;
1162 verf_len = ntohl(*p++);
1163 p += XDR_QUADLEN(verf_len);
1164 /* move accept_stat to right place: */
1165 memcpy(p, p + 2, 4);
1166 /* Also don't wrap if the accept stat is nonzero: */
1167 if (*p != rpc_success) {
1168 resbuf->head[0].iov_len -= 2 * 4;
1169 return NULL;
1170 }
1171 p++;
1172 return p;
1173}
1174
1150static inline int 1175static inline int
1151svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) 1176svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1152{ 1177{
@@ -1160,17 +1185,9 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1160 int integ_offset, integ_len; 1185 int integ_offset, integ_len;
1161 int stat = -EINVAL; 1186 int stat = -EINVAL;
1162 1187
1163 p = gsd->body_start; 1188 p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
1164 gsd->body_start = NULL; 1189 if (p == NULL)
1165 /* move accept_stat to right place: */
1166 memcpy(p, p + 2, 4);
1167 /* Don't wrap in failure case: */
1168 /* Counting on not getting here if call was not even accepted! */
1169 if (*p != rpc_success) {
1170 resbuf->head[0].iov_len -= 2 * 4;
1171 goto out; 1190 goto out;
1172 }
1173 p++;
1174 integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; 1191 integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
1175 integ_len = resbuf->len - integ_offset; 1192 integ_len = resbuf->len - integ_offset;
1176 BUG_ON(integ_len % 4); 1193 BUG_ON(integ_len % 4);
@@ -1191,7 +1208,6 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1191 resbuf->tail[0].iov_base = resbuf->head[0].iov_base 1208 resbuf->tail[0].iov_base = resbuf->head[0].iov_base
1192 + resbuf->head[0].iov_len; 1209 + resbuf->head[0].iov_len;
1193 resbuf->tail[0].iov_len = 0; 1210 resbuf->tail[0].iov_len = 0;
1194 rqstp->rq_restailpage = 0;
1195 resv = &resbuf->tail[0]; 1211 resv = &resbuf->tail[0];
1196 } else { 1212 } else {
1197 resv = &resbuf->tail[0]; 1213 resv = &resbuf->tail[0];
@@ -1223,24 +1239,16 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
1223 int offset; 1239 int offset;
1224 int pad; 1240 int pad;
1225 1241
1226 p = gsd->body_start; 1242 p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
1227 gsd->body_start = NULL; 1243 if (p == NULL)
1228 /* move accept_stat to right place: */
1229 memcpy(p, p + 2, 4);
1230 /* Don't wrap in failure case: */
1231 /* Counting on not getting here if call was not even accepted! */
1232 if (*p != rpc_success) {
1233 resbuf->head[0].iov_len -= 2 * 4;
1234 return 0; 1244 return 0;
1235 }
1236 p++;
1237 len = p++; 1245 len = p++;
1238 offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; 1246 offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
1239 *p++ = htonl(gc->gc_seq); 1247 *p++ = htonl(gc->gc_seq);
1240 inpages = resbuf->pages; 1248 inpages = resbuf->pages;
1241 /* XXX: Would be better to write some xdr helper functions for 1249 /* XXX: Would be better to write some xdr helper functions for
1242 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */ 1250 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
1243 if (resbuf->tail[0].iov_base && rqstp->rq_restailpage == 0) { 1251 if (resbuf->tail[0].iov_base) {
1244 BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base 1252 BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
1245 + PAGE_SIZE); 1253 + PAGE_SIZE);
1246 BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); 1254 BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base);
@@ -1258,7 +1266,6 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
1258 resbuf->tail[0].iov_base = resbuf->head[0].iov_base 1266 resbuf->tail[0].iov_base = resbuf->head[0].iov_base
1259 + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE; 1267 + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE;
1260 resbuf->tail[0].iov_len = 0; 1268 resbuf->tail[0].iov_len = 0;
1261 rqstp->rq_restailpage = 0;
1262 } 1269 }
1263 if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages)) 1270 if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages))
1264 return -ENOMEM; 1271 return -ENOMEM;
@@ -1282,7 +1289,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
1282 if (gc->gc_proc != RPC_GSS_PROC_DATA) 1289 if (gc->gc_proc != RPC_GSS_PROC_DATA)
1283 goto out; 1290 goto out;
1284 /* Release can be called twice, but we only wrap once. */ 1291 /* Release can be called twice, but we only wrap once. */
1285 if (gsd->body_start == NULL) 1292 if (gsd->verf_start == NULL)
1286 goto out; 1293 goto out;
1287 /* normally not set till svc_send, but we need it here: */ 1294 /* normally not set till svc_send, but we need it here: */
1288 /* XXX: what for? Do we mess it up the moment we call svc_putu32 1295 /* XXX: what for? Do we mess it up the moment we call svc_putu32
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a99e67b164c1..c2c8bb20d07f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -417,18 +417,15 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
417 if (size > RPCSVC_MAXPAYLOAD) 417 if (size > RPCSVC_MAXPAYLOAD)
418 size = RPCSVC_MAXPAYLOAD; 418 size = RPCSVC_MAXPAYLOAD;
419 pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE; 419 pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
420 rqstp->rq_argused = 0;
421 rqstp->rq_resused = 0;
422 arghi = 0; 420 arghi = 0;
423 BUG_ON(pages > RPCSVC_MAXPAGES); 421 BUG_ON(pages > RPCSVC_MAXPAGES);
424 while (pages) { 422 while (pages) {
425 struct page *p = alloc_page(GFP_KERNEL); 423 struct page *p = alloc_page(GFP_KERNEL);
426 if (!p) 424 if (!p)
427 break; 425 break;
428 rqstp->rq_argpages[arghi++] = p; 426 rqstp->rq_pages[arghi++] = p;
429 pages--; 427 pages--;
430 } 428 }
431 rqstp->rq_arghi = arghi;
432 return ! pages; 429 return ! pages;
433} 430}
434 431
@@ -438,14 +435,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
438static void 435static void
439svc_release_buffer(struct svc_rqst *rqstp) 436svc_release_buffer(struct svc_rqst *rqstp)
440{ 437{
441 while (rqstp->rq_arghi) 438 int i;
442 put_page(rqstp->rq_argpages[--rqstp->rq_arghi]); 439 for (i=0; i<ARRAY_SIZE(rqstp->rq_pages); i++)
443 while (rqstp->rq_resused) { 440 if (rqstp->rq_pages[i])
444 if (rqstp->rq_respages[--rqstp->rq_resused] == NULL) 441 put_page(rqstp->rq_pages[i]);
445 continue;
446 put_page(rqstp->rq_respages[rqstp->rq_resused]);
447 }
448 rqstp->rq_argused = 0;
449} 442}
450 443
451/* 444/*
@@ -651,23 +644,32 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
651 unsigned long flags; 644 unsigned long flags;
652 int i, error = 0, dummy; 645 int i, error = 0, dummy;
653 646
654 progp = serv->sv_program;
655
656 dprintk("RPC: svc_register(%s, %s, %d)\n",
657 progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
658
659 if (!port) 647 if (!port)
660 clear_thread_flag(TIF_SIGPENDING); 648 clear_thread_flag(TIF_SIGPENDING);
661 649
662 for (i = 0; i < progp->pg_nvers; i++) { 650 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
663 if (progp->pg_vers[i] == NULL) 651 for (i = 0; i < progp->pg_nvers; i++) {
664 continue; 652 if (progp->pg_vers[i] == NULL)
665 error = rpc_register(progp->pg_prog, i, proto, port, &dummy); 653 continue;
666 if (error < 0) 654
667 break; 655 dprintk("RPC: svc_register(%s, %s, %d, %d)%s\n",
668 if (port && !dummy) { 656 progp->pg_name,
669 error = -EACCES; 657 proto == IPPROTO_UDP? "udp" : "tcp",
670 break; 658 port,
659 i,
660 progp->pg_vers[i]->vs_hidden?
661 " (but not telling portmap)" : "");
662
663 if (progp->pg_vers[i]->vs_hidden)
664 continue;
665
666 error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
667 if (error < 0)
668 break;
669 if (port && !dummy) {
670 error = -EACCES;
671 break;
672 }
671 } 673 }
672 } 674 }
673 675
@@ -697,7 +699,7 @@ svc_process(struct svc_rqst *rqstp)
697 u32 dir, prog, vers, proc; 699 u32 dir, prog, vers, proc;
698 __be32 auth_stat, rpc_stat; 700 __be32 auth_stat, rpc_stat;
699 int auth_res; 701 int auth_res;
700 __be32 *accept_statp; 702 __be32 *reply_statp;
701 703
702 rpc_stat = rpc_success; 704 rpc_stat = rpc_success;
703 705
@@ -707,10 +709,10 @@ svc_process(struct svc_rqst *rqstp)
707 /* setup response xdr_buf. 709 /* setup response xdr_buf.
708 * Initially it has just one page 710 * Initially it has just one page
709 */ 711 */
710 svc_take_page(rqstp); /* must succeed */ 712 rqstp->rq_resused = 1;
711 resv->iov_base = page_address(rqstp->rq_respages[0]); 713 resv->iov_base = page_address(rqstp->rq_respages[0]);
712 resv->iov_len = 0; 714 resv->iov_len = 0;
713 rqstp->rq_res.pages = rqstp->rq_respages+1; 715 rqstp->rq_res.pages = rqstp->rq_respages + 1;
714 rqstp->rq_res.len = 0; 716 rqstp->rq_res.len = 0;
715 rqstp->rq_res.page_base = 0; 717 rqstp->rq_res.page_base = 0;
716 rqstp->rq_res.page_len = 0; 718 rqstp->rq_res.page_len = 0;
@@ -738,7 +740,7 @@ svc_process(struct svc_rqst *rqstp)
738 goto err_bad_rpc; 740 goto err_bad_rpc;
739 741
740 /* Save position in case we later decide to reject: */ 742 /* Save position in case we later decide to reject: */
741 accept_statp = resv->iov_base + resv->iov_len; 743 reply_statp = resv->iov_base + resv->iov_len;
742 744
743 svc_putnl(resv, 0); /* ACCEPT */ 745 svc_putnl(resv, 0); /* ACCEPT */
744 746
@@ -886,7 +888,7 @@ err_bad_auth:
886 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); 888 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
887 serv->sv_stats->rpcbadauth++; 889 serv->sv_stats->rpcbadauth++;
888 /* Restore write pointer to location of accept status: */ 890 /* Restore write pointer to location of accept status: */
889 xdr_ressize_check(rqstp, accept_statp); 891 xdr_ressize_check(rqstp, reply_statp);
890 svc_putnl(resv, 1); /* REJECT */ 892 svc_putnl(resv, 1); /* REJECT */
891 svc_putnl(resv, 1); /* AUTH_ERROR */ 893 svc_putnl(resv, 1); /* AUTH_ERROR */
892 svc_putnl(resv, ntohl(auth_stat)); /* status */ 894 svc_putnl(resv, ntohl(auth_stat)); /* status */
@@ -926,3 +928,18 @@ err_bad:
926 svc_putnl(resv, ntohl(rpc_stat)); 928 svc_putnl(resv, ntohl(rpc_stat));
927 goto sendit; 929 goto sendit;
928} 930}
931
932/*
933 * Return (transport-specific) limit on the rpc payload.
934 */
935u32 svc_max_payload(const struct svc_rqst *rqstp)
936{
937 int max = RPCSVC_MAXPAYLOAD_TCP;
938
939 if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
940 max = RPCSVC_MAXPAYLOAD_UDP;
941 if (rqstp->rq_server->sv_bufsz < max)
942 max = rqstp->rq_server->sv_bufsz;
943 return max;
944}
945EXPORT_SYMBOL_GPL(svc_max_payload);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 40d41a2831d7..e1bd933629fe 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -9,6 +9,7 @@
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/hash.h> 10#include <linux/hash.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <net/sock.h>
12 13
13#define RPCDBG_FACILITY RPCDBG_AUTH 14#define RPCDBG_FACILITY RPCDBG_AUTH
14 15
@@ -375,6 +376,44 @@ void svcauth_unix_purge(void)
375 cache_purge(&ip_map_cache); 376 cache_purge(&ip_map_cache);
376} 377}
377 378
379static inline struct ip_map *
380ip_map_cached_get(struct svc_rqst *rqstp)
381{
382 struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
383 if (ipm != NULL) {
384 if (!cache_valid(&ipm->h)) {
385 /*
386 * The entry has been invalidated since it was
387 * remembered, e.g. by a second mount from the
388 * same IP address.
389 */
390 rqstp->rq_sock->sk_info_authunix = NULL;
391 cache_put(&ipm->h, &ip_map_cache);
392 return NULL;
393 }
394 cache_get(&ipm->h);
395 }
396 return ipm;
397}
398
399static inline void
400ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
401{
402 struct svc_sock *svsk = rqstp->rq_sock;
403
404 if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
405 svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */
406 else
407 cache_put(&ipm->h, &ip_map_cache);
408}
409
410void
411svcauth_unix_info_release(void *info)
412{
413 struct ip_map *ipm = info;
414 cache_put(&ipm->h, &ip_map_cache);
415}
416
378static int 417static int
379svcauth_unix_set_client(struct svc_rqst *rqstp) 418svcauth_unix_set_client(struct svc_rqst *rqstp)
380{ 419{
@@ -384,8 +423,10 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
384 if (rqstp->rq_proc == 0) 423 if (rqstp->rq_proc == 0)
385 return SVC_OK; 424 return SVC_OK;
386 425
387 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 426 ipm = ip_map_cached_get(rqstp);
388 rqstp->rq_addr.sin_addr); 427 if (ipm == NULL)
428 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
429 rqstp->rq_addr.sin_addr);
389 430
390 if (ipm == NULL) 431 if (ipm == NULL)
391 return SVC_DENIED; 432 return SVC_DENIED;
@@ -400,7 +441,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
400 case 0: 441 case 0:
401 rqstp->rq_client = &ipm->m_client->h; 442 rqstp->rq_client = &ipm->m_client->h;
402 kref_get(&rqstp->rq_client->ref); 443 kref_get(&rqstp->rq_client->ref);
403 cache_put(&ipm->h, &ip_map_cache); 444 ip_map_cached_put(rqstp, ipm);
404 break; 445 break;
405 } 446 }
406 return SVC_OK; 447 return SVC_OK;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cba85d195222..b39e7e2b648f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -313,7 +313,7 @@ svc_sock_release(struct svc_rqst *rqstp)
313 313
314 svc_release_skb(rqstp); 314 svc_release_skb(rqstp);
315 315
316 svc_free_allpages(rqstp); 316 svc_free_res_pages(rqstp);
317 rqstp->rq_res.page_len = 0; 317 rqstp->rq_res.page_len = 0;
318 rqstp->rq_res.page_base = 0; 318 rqstp->rq_res.page_base = 0;
319 319
@@ -412,7 +412,8 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
412 /* send head */ 412 /* send head */
413 if (slen == xdr->head[0].iov_len) 413 if (slen == xdr->head[0].iov_len)
414 flags = 0; 414 flags = 0;
415 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); 415 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
416 xdr->head[0].iov_len, flags);
416 if (len != xdr->head[0].iov_len) 417 if (len != xdr->head[0].iov_len)
417 goto out; 418 goto out;
418 slen -= xdr->head[0].iov_len; 419 slen -= xdr->head[0].iov_len;
@@ -437,8 +438,9 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
437 } 438 }
438 /* send tail */ 439 /* send tail */
439 if (xdr->tail[0].iov_len) { 440 if (xdr->tail[0].iov_len) {
440 result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 441 result = kernel_sendpage(sock, rqstp->rq_respages[0],
441 ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), 442 ((unsigned long)xdr->tail[0].iov_base)
443 & (PAGE_SIZE-1),
442 xdr->tail[0].iov_len, 0); 444 xdr->tail[0].iov_len, 0);
443 445
444 if (result > 0) 446 if (result > 0)
@@ -492,7 +494,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
492 } 494 }
493 spin_unlock(&serv->sv_lock); 495 spin_unlock(&serv->sv_lock);
494 if (closesk) 496 if (closesk)
497 /* Should unregister with portmap, but you cannot
498 * unregister just one protocol...
499 */
495 svc_delete_socket(closesk); 500 svc_delete_socket(closesk);
501 else if (toclose)
502 return -ENOENT;
496 return len; 503 return len;
497} 504}
498EXPORT_SYMBOL(svc_sock_names); 505EXPORT_SYMBOL(svc_sock_names);
@@ -703,9 +710,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
703 if (len <= rqstp->rq_arg.head[0].iov_len) { 710 if (len <= rqstp->rq_arg.head[0].iov_len) {
704 rqstp->rq_arg.head[0].iov_len = len; 711 rqstp->rq_arg.head[0].iov_len = len;
705 rqstp->rq_arg.page_len = 0; 712 rqstp->rq_arg.page_len = 0;
713 rqstp->rq_respages = rqstp->rq_pages+1;
706 } else { 714 } else {
707 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 715 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
708 rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; 716 rqstp->rq_respages = rqstp->rq_pages + 1 +
717 (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
709 } 718 }
710 719
711 if (serv->sv_stats) 720 if (serv->sv_stats)
@@ -946,7 +955,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
946 struct svc_sock *svsk = rqstp->rq_sock; 955 struct svc_sock *svsk = rqstp->rq_sock;
947 struct svc_serv *serv = svsk->sk_server; 956 struct svc_serv *serv = svsk->sk_server;
948 int len; 957 int len;
949 struct kvec vec[RPCSVC_MAXPAGES]; 958 struct kvec *vec;
950 int pnum, vlen; 959 int pnum, vlen;
951 960
952 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 961 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
@@ -1044,15 +1053,17 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1044 len = svsk->sk_reclen; 1053 len = svsk->sk_reclen;
1045 set_bit(SK_DATA, &svsk->sk_flags); 1054 set_bit(SK_DATA, &svsk->sk_flags);
1046 1055
1056 vec = rqstp->rq_vec;
1047 vec[0] = rqstp->rq_arg.head[0]; 1057 vec[0] = rqstp->rq_arg.head[0];
1048 vlen = PAGE_SIZE; 1058 vlen = PAGE_SIZE;
1049 pnum = 1; 1059 pnum = 1;
1050 while (vlen < len) { 1060 while (vlen < len) {
1051 vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]); 1061 vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
1052 vec[pnum].iov_len = PAGE_SIZE; 1062 vec[pnum].iov_len = PAGE_SIZE;
1053 pnum++; 1063 pnum++;
1054 vlen += PAGE_SIZE; 1064 vlen += PAGE_SIZE;
1055 } 1065 }
1066 rqstp->rq_respages = &rqstp->rq_pages[pnum];
1056 1067
1057 /* Now receive data */ 1068 /* Now receive data */
1058 len = svc_recvfrom(rqstp, vec, pnum, len); 1069 len = svc_recvfrom(rqstp, vec, pnum, len);
@@ -1204,7 +1215,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1204 struct svc_sock *svsk =NULL; 1215 struct svc_sock *svsk =NULL;
1205 struct svc_serv *serv = rqstp->rq_server; 1216 struct svc_serv *serv = rqstp->rq_server;
1206 struct svc_pool *pool = rqstp->rq_pool; 1217 struct svc_pool *pool = rqstp->rq_pool;
1207 int len; 1218 int len, i;
1208 int pages; 1219 int pages;
1209 struct xdr_buf *arg; 1220 struct xdr_buf *arg;
1210 DECLARE_WAITQUEUE(wait, current); 1221 DECLARE_WAITQUEUE(wait, current);
@@ -1221,27 +1232,22 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1221 "svc_recv: service %p, wait queue active!\n", 1232 "svc_recv: service %p, wait queue active!\n",
1222 rqstp); 1233 rqstp);
1223 1234
1224 /* Initialize the buffers */
1225 /* first reclaim pages that were moved to response list */
1226 svc_pushback_allpages(rqstp);
1227 1235
1228 /* now allocate needed pages. If we get a failure, sleep briefly */ 1236 /* now allocate needed pages. If we get a failure, sleep briefly */
1229 pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; 1237 pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
1230 while (rqstp->rq_arghi < pages) { 1238 for (i=0; i < pages ; i++)
1231 struct page *p = alloc_page(GFP_KERNEL); 1239 while (rqstp->rq_pages[i] == NULL) {
1232 if (!p) { 1240 struct page *p = alloc_page(GFP_KERNEL);
1233 schedule_timeout_uninterruptible(msecs_to_jiffies(500)); 1241 if (!p)
1234 continue; 1242 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
1243 rqstp->rq_pages[i] = p;
1235 } 1244 }
1236 rqstp->rq_argpages[rqstp->rq_arghi++] = p;
1237 }
1238 1245
1239 /* Make arg->head point to first page and arg->pages point to rest */ 1246 /* Make arg->head point to first page and arg->pages point to rest */
1240 arg = &rqstp->rq_arg; 1247 arg = &rqstp->rq_arg;
1241 arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]); 1248 arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
1242 arg->head[0].iov_len = PAGE_SIZE; 1249 arg->head[0].iov_len = PAGE_SIZE;
1243 rqstp->rq_argused = 1; 1250 arg->pages = rqstp->rq_pages + 1;
1244 arg->pages = rqstp->rq_argpages + 1;
1245 arg->page_base = 0; 1251 arg->page_base = 0;
1246 /* save at least one page for response */ 1252 /* save at least one page for response */
1247 arg->page_len = (pages-2)*PAGE_SIZE; 1253 arg->page_len = (pages-2)*PAGE_SIZE;
@@ -1604,6 +1610,8 @@ svc_delete_socket(struct svc_sock *svsk)
1604 sockfd_put(svsk->sk_sock); 1610 sockfd_put(svsk->sk_sock);
1605 else 1611 else
1606 sock_release(svsk->sk_sock); 1612 sock_release(svsk->sk_sock);
1613 if (svsk->sk_info_authunix != NULL)
1614 svcauth_unix_info_release(svsk->sk_info_authunix);
1607 kfree(svsk); 1615 kfree(svsk);
1608 } else { 1616 } else {
1609 spin_unlock_bh(&serv->sv_lock); 1617 spin_unlock_bh(&serv->sv_lock);
@@ -1699,6 +1707,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
1699 rqstp->rq_prot = dr->prot; 1707 rqstp->rq_prot = dr->prot;
1700 rqstp->rq_addr = dr->addr; 1708 rqstp->rq_addr = dr->addr;
1701 rqstp->rq_daddr = dr->daddr; 1709 rqstp->rq_daddr = dr->daddr;
1710 rqstp->rq_respages = rqstp->rq_pages;
1702 return dr->argslen<<2; 1711 return dr->argslen<<2;
1703} 1712}
1704 1713
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 693f02eca6d6..53bc8cb5adbc 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1666,8 +1666,9 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
1666 char addr_string[16]; 1666 char addr_string[16];
1667 1667
1668 tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); 1668 tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg));
1669 tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle); 1669 tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
1670 1670 (unsigned long) TIPC_SKB_CB(buf)->handle);
1671
1671 n_ptr = l_ptr->owner->next; 1672 n_ptr = l_ptr->owner->next;
1672 tipc_node_lock(n_ptr); 1673 tipc_node_lock(n_ptr);
1673 1674
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 6ac4e4f033ac..d401dc8f05ed 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
41 return (h ^ (h >> 16)) & hmask; 41 return (h ^ (h >> 16)) & hmask;
42} 42}
43 43
44static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, 44static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
45 xfrm_address_t *saddr,
45 unsigned short family, 46 unsigned short family,
46 unsigned int hmask) 47 unsigned int hmask)
47{ 48{
48 unsigned int h = family; 49 unsigned int h = family;
49 switch (family) { 50 switch (family) {
50 case AF_INET: 51 case AF_INET:
51 h ^= __xfrm4_addr_hash(saddr); 52 h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
52 break; 53 break;
53 case AF_INET6: 54 case AF_INET6:
54 h ^= __xfrm6_addr_hash(saddr); 55 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
55 break; 56 break;
56 }; 57 };
57 return (h ^ (h >> 16)) & hmask; 58 return (h ^ (h >> 16)) & hmask;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b6e2e79d7261..2a7861661f14 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type)
778 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 778 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
779 struct xfrm_policy *pol; 779 struct xfrm_policy *pol;
780 struct hlist_node *entry; 780 struct hlist_node *entry;
781 int i; 781 int i, killed;
782 782
783 killed = 0;
783 again1: 784 again1:
784 hlist_for_each_entry(pol, entry, 785 hlist_for_each_entry(pol, entry,
785 &xfrm_policy_inexact[dir], bydst) { 786 &xfrm_policy_inexact[dir], bydst) {
@@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type)
790 write_unlock_bh(&xfrm_policy_lock); 791 write_unlock_bh(&xfrm_policy_lock);
791 792
792 xfrm_policy_kill(pol); 793 xfrm_policy_kill(pol);
794 killed++;
793 795
794 write_lock_bh(&xfrm_policy_lock); 796 write_lock_bh(&xfrm_policy_lock);
795 goto again1; 797 goto again1;
@@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type)
807 write_unlock_bh(&xfrm_policy_lock); 809 write_unlock_bh(&xfrm_policy_lock);
808 810
809 xfrm_policy_kill(pol); 811 xfrm_policy_kill(pol);
812 killed++;
810 813
811 write_lock_bh(&xfrm_policy_lock); 814 write_lock_bh(&xfrm_policy_lock);
812 goto again2; 815 goto again2;
813 } 816 }
814 } 817 }
815 818
816 xfrm_policy_count[dir] = 0; 819 xfrm_policy_count[dir] -= killed;
817 } 820 }
818 atomic_inc(&flow_cache_genid); 821 atomic_inc(&flow_cache_genid);
819 write_unlock_bh(&xfrm_policy_lock); 822 write_unlock_bh(&xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f927b7330f02..39b8bf3a9ded 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
63 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); 63 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64} 64}
65 65
66static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, 66static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67 xfrm_address_t *saddr,
67 unsigned short family) 68 unsigned short family)
68{ 69{
69 return __xfrm_src_hash(addr, family, xfrm_state_hmask); 70 return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
70} 71}
71 72
72static inline unsigned int 73static inline unsigned int
@@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list,
92 nhashmask); 93 nhashmask);
93 hlist_add_head(&x->bydst, ndsttable+h); 94 hlist_add_head(&x->bydst, ndsttable+h);
94 95
95 h = __xfrm_src_hash(&x->props.saddr, x->props.family, 96 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97 x->props.family,
96 nhashmask); 98 nhashmask);
97 hlist_add_head(&x->bysrc, nsrctable+h); 99 hlist_add_head(&x->bysrc, nsrctable+h);
98 100
@@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
458 460
459static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) 461static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
460{ 462{
461 unsigned int h = xfrm_src_hash(saddr, family); 463 unsigned int h = xfrm_src_hash(daddr, saddr, family);
462 struct xfrm_state *x; 464 struct xfrm_state *x;
463 struct hlist_node *entry; 465 struct hlist_node *entry;
464 466
@@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
587 if (km_query(x, tmpl, pol) == 0) { 589 if (km_query(x, tmpl, pol) == 0) {
588 x->km.state = XFRM_STATE_ACQ; 590 x->km.state = XFRM_STATE_ACQ;
589 hlist_add_head(&x->bydst, xfrm_state_bydst+h); 591 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
590 h = xfrm_src_hash(saddr, family); 592 h = xfrm_src_hash(daddr, saddr, family);
591 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); 593 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
592 if (x->id.spi) { 594 if (x->id.spi) {
593 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); 595 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
@@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
622 x->props.reqid, x->props.family); 624 x->props.reqid, x->props.family);
623 hlist_add_head(&x->bydst, xfrm_state_bydst+h); 625 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
624 626
625 h = xfrm_src_hash(&x->props.saddr, x->props.family); 627 h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
626 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); 628 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
627 629
628 if (x->id.spi) { 630 if (x->id.spi) {
@@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
748 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; 750 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
749 add_timer(&x->timer); 751 add_timer(&x->timer);
750 hlist_add_head(&x->bydst, xfrm_state_bydst+h); 752 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
751 h = xfrm_src_hash(saddr, family); 753 h = xfrm_src_hash(daddr, saddr, family);
752 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); 754 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
753 wake_up(&km_waitq); 755 wake_up(&km_waitq);
754 } 756 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c59a78d2923a..d54b3a70d5df 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
211 case XFRM_MODE_TRANSPORT: 211 case XFRM_MODE_TRANSPORT:
212 case XFRM_MODE_TUNNEL: 212 case XFRM_MODE_TUNNEL:
213 case XFRM_MODE_ROUTEOPTIMIZATION: 213 case XFRM_MODE_ROUTEOPTIMIZATION:
214 case XFRM_MODE_BEET:
214 break; 215 break;
215 216
216 default: 217 default: