aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_fragment.c89
-rw-r--r--net/ipv4/ip_fragment.c159
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/xfrm4_input.c40
-rw-r--r--net/ipv4/xfrm4_mode_beet.c1
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv4/xfrm4_policy.c27
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c11
10 files changed, 175 insertions, 162 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 484cf512858f..e15e04fc6661 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -136,7 +136,9 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
136 *work -= f->qsize; 136 *work -= f->qsize;
137 atomic_sub(f->qsize, &f->mem); 137 atomic_sub(f->qsize, &f->mem);
138 138
139 f->destructor(q); 139 if (f->destructor)
140 f->destructor(q);
141 kfree(q);
140 142
141} 143}
142EXPORT_SYMBOL(inet_frag_destroy); 144EXPORT_SYMBOL(inet_frag_destroy);
@@ -172,3 +174,88 @@ int inet_frag_evictor(struct inet_frags *f)
172 return evicted; 174 return evicted;
173} 175}
174EXPORT_SYMBOL(inet_frag_evictor); 176EXPORT_SYMBOL(inet_frag_evictor);
177
178static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
179 struct inet_frags *f, unsigned int hash, void *arg)
180{
181 struct inet_frag_queue *qp;
182#ifdef CONFIG_SMP
183 struct hlist_node *n;
184#endif
185
186 write_lock(&f->lock);
187#ifdef CONFIG_SMP
188 /* With SMP race we have to recheck hash table, because
189 * such entry could be created on other cpu, while we
190 * promoted read lock to write lock.
191 */
192 hlist_for_each_entry(qp, n, &f->hash[hash], list) {
193 if (f->match(qp, arg)) {
194 atomic_inc(&qp->refcnt);
195 write_unlock(&f->lock);
196 qp_in->last_in |= COMPLETE;
197 inet_frag_put(qp_in, f);
198 return qp;
199 }
200 }
201#endif
202 qp = qp_in;
203 if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout))
204 atomic_inc(&qp->refcnt);
205
206 atomic_inc(&qp->refcnt);
207 hlist_add_head(&qp->list, &f->hash[hash]);
208 list_add_tail(&qp->lru_list, &f->lru_list);
209 f->nqueues++;
210 write_unlock(&f->lock);
211 return qp;
212}
213
214static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
215{
216 struct inet_frag_queue *q;
217
218 q = kzalloc(f->qsize, GFP_ATOMIC);
219 if (q == NULL)
220 return NULL;
221
222 f->constructor(q, arg);
223 atomic_add(f->qsize, &f->mem);
224 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
225 spin_lock_init(&q->lock);
226 atomic_set(&q->refcnt, 1);
227
228 return q;
229}
230
231static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
232 void *arg, unsigned int hash)
233{
234 struct inet_frag_queue *q;
235
236 q = inet_frag_alloc(f, arg);
237 if (q == NULL)
238 return NULL;
239
240 return inet_frag_intern(q, f, hash, arg);
241}
242
243struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
244 unsigned int hash)
245{
246 struct inet_frag_queue *q;
247 struct hlist_node *n;
248
249 read_lock(&f->lock);
250 hlist_for_each_entry(q, n, &f->hash[hash], list) {
251 if (f->match(q, key)) {
252 atomic_inc(&q->refcnt);
253 read_unlock(&f->lock);
254 return q;
255 }
256 }
257 read_unlock(&f->lock);
258
259 return inet_frag_create(f, key, hash);
260}
261EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 443b3f89192f..2143bf30597a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -108,6 +108,11 @@ int ip_frag_mem(void)
108static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 108static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
109 struct net_device *dev); 109 struct net_device *dev);
110 110
111struct ip4_create_arg {
112 struct iphdr *iph;
113 u32 user;
114};
115
111static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) 116static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
112{ 117{
113 return jhash_3words((__force u32)id << 16 | prot, 118 return jhash_3words((__force u32)id << 16 | prot,
@@ -123,6 +128,19 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q)
123 return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); 128 return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
124} 129}
125 130
131static int ip4_frag_match(struct inet_frag_queue *q, void *a)
132{
133 struct ipq *qp;
134 struct ip4_create_arg *arg = a;
135
136 qp = container_of(q, struct ipq, q);
137 return (qp->id == arg->iph->id &&
138 qp->saddr == arg->iph->saddr &&
139 qp->daddr == arg->iph->daddr &&
140 qp->protocol == arg->iph->protocol &&
141 qp->user == arg->user);
142}
143
126/* Memory Tracking Functions. */ 144/* Memory Tracking Functions. */
127static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) 145static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
128{ 146{
@@ -132,6 +150,20 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
132 kfree_skb(skb); 150 kfree_skb(skb);
133} 151}
134 152
153static void ip4_frag_init(struct inet_frag_queue *q, void *a)
154{
155 struct ipq *qp = container_of(q, struct ipq, q);
156 struct ip4_create_arg *arg = a;
157
158 qp->protocol = arg->iph->protocol;
159 qp->id = arg->iph->id;
160 qp->saddr = arg->iph->saddr;
161 qp->daddr = arg->iph->daddr;
162 qp->user = arg->user;
163 qp->peer = sysctl_ipfrag_max_dist ?
164 inet_getpeer(arg->iph->saddr, 1) : NULL;
165}
166
135static __inline__ void ip4_frag_free(struct inet_frag_queue *q) 167static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
136{ 168{
137 struct ipq *qp; 169 struct ipq *qp;
@@ -139,17 +171,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
139 qp = container_of(q, struct ipq, q); 171 qp = container_of(q, struct ipq, q);
140 if (qp->peer) 172 if (qp->peer)
141 inet_putpeer(qp->peer); 173 inet_putpeer(qp->peer);
142 kfree(qp);
143}
144
145static __inline__ struct ipq *frag_alloc_queue(void)
146{
147 struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
148
149 if (!qp)
150 return NULL;
151 atomic_add(sizeof(struct ipq), &ip4_frags.mem);
152 return qp;
153} 174}
154 175
155 176
@@ -185,7 +206,9 @@ static void ip_evictor(void)
185 */ 206 */
186static void ip_expire(unsigned long arg) 207static void ip_expire(unsigned long arg)
187{ 208{
188 struct ipq *qp = (struct ipq *) arg; 209 struct ipq *qp;
210
211 qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
189 212
190 spin_lock(&qp->q.lock); 213 spin_lock(&qp->q.lock);
191 214
@@ -210,112 +233,30 @@ out:
210 ipq_put(qp); 233 ipq_put(qp);
211} 234}
212 235
213/* Creation primitives. */ 236/* Find the correct entry in the "incomplete datagrams" queue for
214 237 * this IP datagram, and create new one, if nothing is found.
215static struct ipq *ip_frag_intern(struct ipq *qp_in) 238 */
239static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
216{ 240{
217 struct ipq *qp; 241 struct inet_frag_queue *q;
218#ifdef CONFIG_SMP 242 struct ip4_create_arg arg;
219 struct hlist_node *n;
220#endif
221 unsigned int hash; 243 unsigned int hash;
222 244
223 write_lock(&ip4_frags.lock); 245 arg.iph = iph;
224 hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, 246 arg.user = user;
225 qp_in->protocol); 247 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
226#ifdef CONFIG_SMP
227 /* With SMP race we have to recheck hash table, because
228 * such entry could be created on other cpu, while we
229 * promoted read lock to write lock.
230 */
231 hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
232 if (qp->id == qp_in->id &&
233 qp->saddr == qp_in->saddr &&
234 qp->daddr == qp_in->daddr &&
235 qp->protocol == qp_in->protocol &&
236 qp->user == qp_in->user) {
237 atomic_inc(&qp->q.refcnt);
238 write_unlock(&ip4_frags.lock);
239 qp_in->q.last_in |= COMPLETE;
240 ipq_put(qp_in);
241 return qp;
242 }
243 }
244#endif
245 qp = qp_in;
246
247 if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
248 atomic_inc(&qp->q.refcnt);
249 248
250 atomic_inc(&qp->q.refcnt); 249 q = inet_frag_find(&ip4_frags, &arg, hash);
251 hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); 250 if (q == NULL)
252 INIT_LIST_HEAD(&qp->q.lru_list);
253 list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
254 ip4_frags.nqueues++;
255 write_unlock(&ip4_frags.lock);
256 return qp;
257}
258
259/* Add an entry to the 'ipq' queue for a newly received IP datagram. */
260static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
261{
262 struct ipq *qp;
263
264 if ((qp = frag_alloc_queue()) == NULL)
265 goto out_nomem; 251 goto out_nomem;
266 252
267 qp->protocol = iph->protocol; 253 return container_of(q, struct ipq, q);
268 qp->id = iph->id;
269 qp->saddr = iph->saddr;
270 qp->daddr = iph->daddr;
271 qp->user = user;
272 qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
273
274 /* Initialize a timer for this entry. */
275 init_timer(&qp->q.timer);
276 qp->q.timer.data = (unsigned long) qp; /* pointer to queue */
277 qp->q.timer.function = ip_expire; /* expire function */
278 spin_lock_init(&qp->q.lock);
279 atomic_set(&qp->q.refcnt, 1);
280
281 return ip_frag_intern(qp);
282 254
283out_nomem: 255out_nomem:
284 LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); 256 LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
285 return NULL; 257 return NULL;
286} 258}
287 259
288/* Find the correct entry in the "incomplete datagrams" queue for
289 * this IP datagram, and create new one, if nothing is found.
290 */
291static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
292{
293 __be16 id = iph->id;
294 __be32 saddr = iph->saddr;
295 __be32 daddr = iph->daddr;
296 __u8 protocol = iph->protocol;
297 unsigned int hash;
298 struct ipq *qp;
299 struct hlist_node *n;
300
301 read_lock(&ip4_frags.lock);
302 hash = ipqhashfn(id, saddr, daddr, protocol);
303 hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
304 if (qp->id == id &&
305 qp->saddr == saddr &&
306 qp->daddr == daddr &&
307 qp->protocol == protocol &&
308 qp->user == user) {
309 atomic_inc(&qp->q.refcnt);
310 read_unlock(&ip4_frags.lock);
311 return qp;
312 }
313 }
314 read_unlock(&ip4_frags.lock);
315
316 return ip_frag_create(iph, user);
317}
318
319/* Is the fragment too far ahead to be part of ipq? */ 260/* Is the fragment too far ahead to be part of ipq? */
320static inline int ip_frag_too_far(struct ipq *qp) 261static inline int ip_frag_too_far(struct ipq *qp)
321{ 262{
@@ -545,7 +486,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
545 if (prev) { 486 if (prev) {
546 head = prev->next; 487 head = prev->next;
547 fp = skb_clone(head, GFP_ATOMIC); 488 fp = skb_clone(head, GFP_ATOMIC);
548
549 if (!fp) 489 if (!fp)
550 goto out_nomem; 490 goto out_nomem;
551 491
@@ -571,7 +511,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
571 goto out_oversize; 511 goto out_oversize;
572 512
573 /* Head of list must not be cloned. */ 513 /* Head of list must not be cloned. */
574 err = -ENOMEM;
575 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) 514 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
576 goto out_nomem; 515 goto out_nomem;
577 516
@@ -627,6 +566,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
627out_nomem: 566out_nomem:
628 LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " 567 LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
629 "queue %p\n", qp); 568 "queue %p\n", qp);
569 err = -ENOMEM;
630 goto out_fail; 570 goto out_fail;
631out_oversize: 571out_oversize:
632 if (net_ratelimit()) 572 if (net_ratelimit())
@@ -671,9 +611,12 @@ void __init ipfrag_init(void)
671{ 611{
672 ip4_frags.ctl = &ip4_frags_ctl; 612 ip4_frags.ctl = &ip4_frags_ctl;
673 ip4_frags.hashfn = ip4_hashfn; 613 ip4_frags.hashfn = ip4_hashfn;
614 ip4_frags.constructor = ip4_frag_init;
674 ip4_frags.destructor = ip4_frag_free; 615 ip4_frags.destructor = ip4_frag_free;
675 ip4_frags.skb_free = NULL; 616 ip4_frags.skb_free = NULL;
676 ip4_frags.qsize = sizeof(struct ipq); 617 ip4_frags.qsize = sizeof(struct ipq);
618 ip4_frags.match = ip4_frag_match;
619 ip4_frags.frag_expire = ip_expire;
677 inet_frags_init(&ip4_frags); 620 inet_frags_init(&ip4_frags);
678} 621}
679 622
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0f00966b1784..9288220b73a8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1121,7 +1121,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1121 struct sk_buff *skb; 1121 struct sk_buff *skb;
1122 int flag = 0; 1122 int flag = 0;
1123 int cnt = 0; 1123 int cnt = 0;
1124 u32 new_low_seq = 0; 1124 u32 new_low_seq = tp->snd_nxt;
1125 1125
1126 tcp_for_write_queue(skb, sk) { 1126 tcp_for_write_queue(skb, sk) {
1127 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; 1127 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1153,7 +1153,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1153 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); 1153 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1154 } 1154 }
1155 } else { 1155 } else {
1156 if (!new_low_seq || before(ack_seq, new_low_seq)) 1156 if (before(ack_seq, new_low_seq))
1157 new_low_seq = ack_seq; 1157 new_low_seq = ack_seq;
1158 cnt += tcp_skb_pcount(skb); 1158 cnt += tcp_skb_pcount(skb);
1159 } 1159 }
@@ -1242,7 +1242,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1242 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 1242 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
1243 int reord = tp->packets_out; 1243 int reord = tp->packets_out;
1244 int prior_fackets; 1244 int prior_fackets;
1245 u32 highest_sack_end_seq = 0; 1245 u32 highest_sack_end_seq = tp->lost_retrans_low;
1246 int flag = 0; 1246 int flag = 0;
1247 int found_dup_sack = 0; 1247 int found_dup_sack = 0;
1248 int cached_fack_count; 1248 int cached_fack_count;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e9bbfde19ac3..5e95c8a07efb 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,19 +16,6 @@
16#include <net/ip.h> 16#include <net/ip.h>
17#include <net/xfrm.h> 17#include <net/xfrm.h>
18 18
19static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
20{
21 switch (nexthdr) {
22 case IPPROTO_IPIP:
23 case IPPROTO_IPV6:
24 *spi = ip_hdr(skb)->saddr;
25 *seq = 0;
26 return 0;
27 }
28
29 return xfrm_parse_spi(skb, nexthdr, spi, seq);
30}
31
32#ifdef CONFIG_NETFILTER 19#ifdef CONFIG_NETFILTER
33static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) 20static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
34{ 21{
@@ -46,28 +33,29 @@ drop:
46} 33}
47#endif 34#endif
48 35
49static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) 36int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
37 int encap_type)
50{ 38{
51 __be32 spi, seq; 39 int err;
40 __be32 seq;
52 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; 41 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
53 struct xfrm_state *x; 42 struct xfrm_state *x;
54 int xfrm_nr = 0; 43 int xfrm_nr = 0;
55 int decaps = 0; 44 int decaps = 0;
56 int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
57 unsigned int nhoff = offsetof(struct iphdr, protocol); 45 unsigned int nhoff = offsetof(struct iphdr, protocol);
58 46
59 if (err != 0) 47 seq = 0;
48 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
60 goto drop; 49 goto drop;
61 50
62 do { 51 do {
63 const struct iphdr *iph = ip_hdr(skb); 52 const struct iphdr *iph = ip_hdr(skb);
64 int nexthdr;
65 53
66 if (xfrm_nr == XFRM_MAX_DEPTH) 54 if (xfrm_nr == XFRM_MAX_DEPTH)
67 goto drop; 55 goto drop;
68 56
69 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, 57 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
70 iph->protocol != IPPROTO_IPV6 ? iph->protocol : IPPROTO_IPIP, AF_INET); 58 nexthdr, AF_INET);
71 if (x == NULL) 59 if (x == NULL)
72 goto drop; 60 goto drop;
73 61
@@ -103,15 +91,15 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
103 91
104 xfrm_vec[xfrm_nr++] = x; 92 xfrm_vec[xfrm_nr++] = x;
105 93
106 if (x->mode->input(x, skb)) 94 if (x->outer_mode->input(x, skb))
107 goto drop; 95 goto drop;
108 96
109 if (x->props.mode == XFRM_MODE_TUNNEL) { 97 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
110 decaps = 1; 98 decaps = 1;
111 break; 99 break;
112 } 100 }
113 101
114 err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); 102 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
115 if (err < 0) 103 if (err < 0)
116 goto drop; 104 goto drop;
117 } while (!err); 105 } while (!err);
@@ -165,6 +153,7 @@ drop:
165 kfree_skb(skb); 153 kfree_skb(skb);
166 return 0; 154 return 0;
167} 155}
156EXPORT_SYMBOL(xfrm4_rcv_encap);
168 157
169/* If it's a keepalive packet, then just eat it. 158/* If it's a keepalive packet, then just eat it.
170 * If it's an encapsulated packet, then pass it to the 159 * If it's an encapsulated packet, then pass it to the
@@ -252,11 +241,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
252 __skb_pull(skb, len); 241 __skb_pull(skb, len);
253 skb_reset_transport_header(skb); 242 skb_reset_transport_header(skb);
254 243
255 /* modify the protocol (it's ESP!) */
256 iph->protocol = IPPROTO_ESP;
257
258 /* process ESP */ 244 /* process ESP */
259 ret = xfrm4_rcv_encap(skb, encap_type); 245 ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
260 return ret; 246 return ret;
261 247
262drop: 248drop:
@@ -266,7 +252,7 @@ drop:
266 252
267int xfrm4_rcv(struct sk_buff *skb) 253int xfrm4_rcv(struct sk_buff *skb)
268{ 254{
269 return xfrm4_rcv_encap(skb, 0); 255 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
270} 256}
271 257
272EXPORT_SYMBOL(xfrm4_rcv); 258EXPORT_SYMBOL(xfrm4_rcv);
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 73d2338bec55..e42e122414be 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -114,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = {
114 .output = xfrm4_beet_output, 114 .output = xfrm4_beet_output,
115 .owner = THIS_MODULE, 115 .owner = THIS_MODULE,
116 .encap = XFRM_MODE_BEET, 116 .encap = XFRM_MODE_BEET,
117 .flags = XFRM_MODE_FLAG_TUNNEL,
117}; 118};
118 119
119static int __init xfrm4_beet_init(void) 120static int __init xfrm4_beet_init(void)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 1ae9d32276f0..e4deecba6dd2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -139,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = {
139 .output = xfrm4_tunnel_output, 139 .output = xfrm4_tunnel_output,
140 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
141 .encap = XFRM_MODE_TUNNEL, 141 .encap = XFRM_MODE_TUNNEL,
142 .flags = XFRM_MODE_FLAG_TUNNEL,
142}; 143};
143 144
144static int __init xfrm4_tunnel_init(void) 145static int __init xfrm4_tunnel_init(void)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index a4edd666318b..c4a7156962bd 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb)
47 struct iphdr *iph; 47 struct iphdr *iph;
48 int err; 48 int err;
49 49
50 if (x->props.mode == XFRM_MODE_TUNNEL) { 50 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
51 err = xfrm4_tunnel_check_size(skb); 51 err = xfrm4_tunnel_check_size(skb);
52 if (err) 52 if (err)
53 goto error_nolock; 53 goto error_nolock;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 329825ca68fe..cc86fb110dd8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
117 header_len += xfrm[i]->props.header_len; 117 header_len += xfrm[i]->props.header_len;
118 trailer_len += xfrm[i]->props.trailer_len; 118 trailer_len += xfrm[i]->props.trailer_len;
119 119
120 if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { 120 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
121 unsigned short encap_family = xfrm[i]->props.family; 121 unsigned short encap_family = xfrm[i]->props.family;
122 switch (encap_family) { 122 switch (encap_family) {
123 case AF_INET: 123 case AF_INET:
@@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
151 i = 0; 151 i = 0;
152 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { 152 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
153 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; 153 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
154 struct xfrm_state_afinfo *afinfo;
155 x->u.rt.fl = *fl; 154 x->u.rt.fl = *fl;
156 155
157 dst_prev->xfrm = xfrm[i++]; 156 dst_prev->xfrm = xfrm[i++];
@@ -169,27 +168,17 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
169 /* Copy neighbout for reachability confirmation */ 168 /* Copy neighbout for reachability confirmation */
170 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); 169 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
171 dst_prev->input = rt->u.dst.input; 170 dst_prev->input = rt->u.dst.input;
172 /* XXX: When IPv6 module can be unloaded, we should manage reference 171 dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
173 * to xfrm6_output in afinfo->output. Miyazawa 172 if (rt0->peer)
174 * */ 173 atomic_inc(&rt0->peer->refcnt);
175 afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); 174 x->u.rt.peer = rt0->peer;
176 if (!afinfo) {
177 dst = *dst_p;
178 err = -EAFNOSUPPORT;
179 goto error;
180 }
181 dst_prev->output = afinfo->output;
182 xfrm_state_put_afinfo(afinfo);
183 if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
184 atomic_inc(&rt->peer->refcnt);
185 x->u.rt.peer = rt->peer;
186 /* Sheit... I remember I did this right. Apparently, 175 /* Sheit... I remember I did this right. Apparently,
187 * it was magically lost, so this code needs audit */ 176 * it was magically lost, so this code needs audit */
188 x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); 177 x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
189 x->u.rt.rt_type = rt->rt_type; 178 x->u.rt.rt_type = rt0->rt_type;
190 x->u.rt.rt_src = rt0->rt_src; 179 x->u.rt.rt_src = rt0->rt_src;
191 x->u.rt.rt_dst = rt0->rt_dst; 180 x->u.rt.rt_dst = rt0->rt_dst;
192 x->u.rt.rt_gateway = rt->rt_gateway; 181 x->u.rt.rt_gateway = rt0->rt_gateway;
193 x->u.rt.rt_spec_dst = rt0->rt_spec_dst; 182 x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
194 x->u.rt.idev = rt0->idev; 183 x->u.rt.idev = rt0->idev;
195 in_dev_hold(rt0->idev); 184 in_dev_hold(rt0->idev);
@@ -291,7 +280,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
291 280
292 if (likely(xdst->u.rt.idev)) 281 if (likely(xdst->u.rt.idev))
293 in_dev_put(xdst->u.rt.idev); 282 in_dev_put(xdst->u.rt.idev);
294 if (dst->xfrm && dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) 283 if (likely(xdst->u.rt.peer))
295 inet_putpeer(xdst->u.rt.peer); 284 inet_putpeer(xdst->u.rt.peer);
296 xfrm_dst_destroy(xdst); 285 xfrm_dst_destroy(xdst);
297} 286}
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 93e2c061cdda..13d54a1c3337 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
49 49
50static struct xfrm_state_afinfo xfrm4_state_afinfo = { 50static struct xfrm_state_afinfo xfrm4_state_afinfo = {
51 .family = AF_INET, 51 .family = AF_INET,
52 .owner = THIS_MODULE,
52 .init_flags = xfrm4_init_flags, 53 .init_flags = xfrm4_init_flags,
53 .init_tempsel = __xfrm4_init_tempsel, 54 .init_tempsel = __xfrm4_init_tempsel,
54 .output = xfrm4_output, 55 .output = xfrm4_output,
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 1312417608e2..326845195620 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -18,7 +18,7 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
18 18
19static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) 19static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
20{ 20{
21 return IPPROTO_IP; 21 return ip_hdr(skb)->protocol;
22} 22}
23 23
24static int ipip_init_state(struct xfrm_state *x) 24static int ipip_init_state(struct xfrm_state *x)
@@ -48,20 +48,25 @@ static struct xfrm_type ipip_type = {
48 .output = ipip_output 48 .output = ipip_output
49}; 49};
50 50
51static int xfrm_tunnel_rcv(struct sk_buff *skb)
52{
53 return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr);
54}
55
51static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) 56static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
52{ 57{
53 return -ENOENT; 58 return -ENOENT;
54} 59}
55 60
56static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler = {
57 .handler = xfrm4_rcv, 62 .handler = xfrm_tunnel_rcv,
58 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
59 .priority = 2, 64 .priority = 2,
60}; 65};
61 66
62#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
63static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler = {
64 .handler = xfrm4_rcv, 69 .handler = xfrm_tunnel_rcv,
65 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
66 .priority = 2, 71 .priority = 2,
67}; 72};