aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/xfrm.h4
-rw-r--r--net/core/dev.c2
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/esp4.c332
-rw-r--r--net/ipv4/xfrm4_state.c8
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c318
-rw-r--r--net/xfrm/xfrm_input.c12
-rw-r--r--net/xfrm/xfrm_output.c8
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c86
11 files changed, 578 insertions, 200 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 31947b9c21d6..d9a81dcef53e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -213,6 +213,8 @@ struct xfrm_state {
213 /* Last used time */ 213 /* Last used time */
214 unsigned long lastused; 214 unsigned long lastused;
215 215
216 struct page_frag xfrag;
217
216 /* Reference to data common to all the instances of this 218 /* Reference to data common to all the instances of this
217 * transformer. */ 219 * transformer. */
218 const struct xfrm_type *type; 220 const struct xfrm_type *type;
@@ -343,7 +345,7 @@ struct xfrm_state_afinfo {
343int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); 345int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
344int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); 346int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
345struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 347struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
346void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 348struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family);
347 349
348struct xfrm_input_afinfo { 350struct xfrm_input_afinfo {
349 unsigned int family; 351 unsigned int family;
diff --git a/net/core/dev.c b/net/core/dev.c
index be11abac89b3..e61528c50209 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4593,6 +4593,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4593 case GRO_MERGED_FREE: 4593 case GRO_MERGED_FREE:
4594 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { 4594 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
4595 skb_dst_drop(skb); 4595 skb_dst_drop(skb);
4596 secpath_reset(skb);
4596 kmem_cache_free(skbuff_head_cache, skb); 4597 kmem_cache_free(skbuff_head_cache, skb);
4597 } else { 4598 } else {
4598 __kfree_skb(skb); 4599 __kfree_skb(skb);
@@ -4633,6 +4634,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
4633 skb->encapsulation = 0; 4634 skb->encapsulation = 0;
4634 skb_shinfo(skb)->gso_type = 0; 4635 skb_shinfo(skb)->gso_type = 0;
4635 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); 4636 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4637 secpath_reset(skb);
4636 4638
4637 napi->skb = skb; 4639 napi->skb = skb;
4638} 4640}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index f2a71025a770..22377c8ff14b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err)
270 int ihl = ip_hdrlen(skb); 270 int ihl = ip_hdrlen(skb);
271 int ah_hlen = (ah->hdrlen + 2) << 2; 271 int ah_hlen = (ah->hdrlen + 2) << 2;
272 272
273 if (err)
274 goto out;
275
273 work_iph = AH_SKB_CB(skb)->tmp; 276 work_iph = AH_SKB_CB(skb)->tmp;
274 auth_data = ah_tmp_auth(work_iph, ihl); 277 auth_data = ah_tmp_auth(work_iph, ihl);
275 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); 278 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 20fb25e3027b..b1e24446e297 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -18,6 +18,8 @@
18#include <net/protocol.h> 18#include <net/protocol.h>
19#include <net/udp.h> 19#include <net/udp.h>
20 20
21#include <linux/highmem.h>
22
21struct esp_skb_cb { 23struct esp_skb_cb {
22 struct xfrm_skb_cb xfrm; 24 struct xfrm_skb_cb xfrm;
23 void *tmp; 25 void *tmp;
@@ -92,11 +94,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
92 __alignof__(struct scatterlist)); 94 __alignof__(struct scatterlist));
93} 95}
94 96
97static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
98{
99 struct esp_output_extra *extra = esp_tmp_extra(tmp);
100 struct crypto_aead *aead = x->data;
101 int extralen = 0;
102 u8 *iv;
103 struct aead_request *req;
104 struct scatterlist *sg;
105
106 if (x->props.flags & XFRM_STATE_ESN)
107 extralen += sizeof(*extra);
108
109 extra = esp_tmp_extra(tmp);
110 iv = esp_tmp_iv(aead, tmp, extralen);
111 req = esp_tmp_req(aead, iv);
112
113 /* Unref skb_frag_pages in the src scatterlist if necessary.
114 * Skip the first sg which comes from skb->data.
115 */
116 if (req->src != req->dst)
117 for (sg = sg_next(req->src); sg; sg = sg_next(sg))
118 put_page(sg_page(sg));
119}
120
95static void esp_output_done(struct crypto_async_request *base, int err) 121static void esp_output_done(struct crypto_async_request *base, int err)
96{ 122{
97 struct sk_buff *skb = base->data; 123 struct sk_buff *skb = base->data;
124 void *tmp;
125 struct dst_entry *dst = skb_dst(skb);
126 struct xfrm_state *x = dst->xfrm;
98 127
99 kfree(ESP_SKB_CB(skb)->tmp); 128 tmp = ESP_SKB_CB(skb)->tmp;
129 esp_ssg_unref(x, tmp);
130 kfree(tmp);
100 xfrm_output_resume(skb, err); 131 xfrm_output_resume(skb, err);
101} 132}
102 133
@@ -120,6 +151,29 @@ static void esp_output_restore_header(struct sk_buff *skb)
120 sizeof(__be32)); 151 sizeof(__be32));
121} 152}
122 153
154static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
155 struct ip_esp_hdr *esph,
156 struct esp_output_extra *extra)
157{
158 struct xfrm_state *x = skb_dst(skb)->xfrm;
159
160 /* For ESN we move the header forward by 4 bytes to
161 * accomodate the high bits. We will move it back after
162 * encryption.
163 */
164 if ((x->props.flags & XFRM_STATE_ESN)) {
165 extra->esphoff = (unsigned char *)esph -
166 skb_transport_header(skb);
167 esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
168 extra->seqhi = esph->spi;
169 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
170 }
171
172 esph->spi = x->id.spi;
173
174 return esph;
175}
176
123static void esp_output_done_esn(struct crypto_async_request *base, int err) 177static void esp_output_done_esn(struct crypto_async_request *base, int err)
124{ 178{
125 struct sk_buff *skb = base->data; 179 struct sk_buff *skb = base->data;
@@ -128,18 +182,36 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
128 esp_output_done(base, err); 182 esp_output_done(base, err);
129} 183}
130 184
185static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
186{
187 /* Fill padding... */
188 if (tfclen) {
189 memset(tail, 0, tfclen);
190 tail += tfclen;
191 }
192 do {
193 int i;
194 for (i = 0; i < plen - 2; i++)
195 tail[i] = i + 1;
196 } while (0);
197 tail[plen - 2] = plen - 2;
198 tail[plen - 1] = proto;
199}
200
131static int esp_output(struct xfrm_state *x, struct sk_buff *skb) 201static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
132{ 202{
133 int err;
134 struct esp_output_extra *extra; 203 struct esp_output_extra *extra;
204 int err = -ENOMEM;
135 struct ip_esp_hdr *esph; 205 struct ip_esp_hdr *esph;
136 struct crypto_aead *aead; 206 struct crypto_aead *aead;
137 struct aead_request *req; 207 struct aead_request *req;
138 struct scatterlist *sg; 208 struct scatterlist *sg, *dsg;
139 struct sk_buff *trailer; 209 struct sk_buff *trailer;
210 struct page *page;
140 void *tmp; 211 void *tmp;
141 u8 *iv; 212 u8 *iv;
142 u8 *tail; 213 u8 *tail;
214 u8 *vaddr;
143 int blksize; 215 int blksize;
144 int clen; 216 int clen;
145 int alen; 217 int alen;
@@ -149,7 +221,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
149 int nfrags; 221 int nfrags;
150 int assoclen; 222 int assoclen;
151 int extralen; 223 int extralen;
224 int tailen;
152 __be64 seqno; 225 __be64 seqno;
226 __u8 proto = *skb_mac_header(skb);
153 227
154 /* skb is pure payload to encrypt */ 228 /* skb is pure payload to encrypt */
155 229
@@ -169,12 +243,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
169 blksize = ALIGN(crypto_aead_blocksize(aead), 4); 243 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
170 clen = ALIGN(skb->len + 2 + tfclen, blksize); 244 clen = ALIGN(skb->len + 2 + tfclen, blksize);
171 plen = clen - skb->len - tfclen; 245 plen = clen - skb->len - tfclen;
172 246 tailen = tfclen + plen + alen;
173 err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
174 if (err < 0)
175 goto error;
176 nfrags = err;
177
178 assoclen = sizeof(*esph); 247 assoclen = sizeof(*esph);
179 extralen = 0; 248 extralen = 0;
180 249
@@ -183,35 +252,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
183 assoclen += sizeof(__be32); 252 assoclen += sizeof(__be32);
184 } 253 }
185 254
186 tmp = esp_alloc_tmp(aead, nfrags, extralen);
187 if (!tmp) {
188 err = -ENOMEM;
189 goto error;
190 }
191
192 extra = esp_tmp_extra(tmp);
193 iv = esp_tmp_iv(aead, tmp, extralen);
194 req = esp_tmp_req(aead, iv);
195 sg = esp_req_sg(aead, req);
196
197 /* Fill padding... */
198 tail = skb_tail_pointer(trailer);
199 if (tfclen) {
200 memset(tail, 0, tfclen);
201 tail += tfclen;
202 }
203 do {
204 int i;
205 for (i = 0; i < plen - 2; i++)
206 tail[i] = i + 1;
207 } while (0);
208 tail[plen - 2] = plen - 2;
209 tail[plen - 1] = *skb_mac_header(skb);
210 pskb_put(skb, trailer, clen - skb->len + alen);
211
212 skb_push(skb, -skb_network_offset(skb));
213 esph = ip_esp_hdr(skb);
214 *skb_mac_header(skb) = IPPROTO_ESP; 255 *skb_mac_header(skb) = IPPROTO_ESP;
256 esph = ip_esp_hdr(skb);
215 257
216 /* this is non-NULL only with UDP Encapsulation */ 258 /* this is non-NULL only with UDP Encapsulation */
217 if (x->encap) { 259 if (x->encap) {
@@ -230,7 +272,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
230 uh = (struct udphdr *)esph; 272 uh = (struct udphdr *)esph;
231 uh->source = sport; 273 uh->source = sport;
232 uh->dest = dport; 274 uh->dest = dport;
233 uh->len = htons(skb->len - skb_transport_offset(skb)); 275 uh->len = htons(skb->len + tailen
276 - skb_transport_offset(skb));
234 uh->check = 0; 277 uh->check = 0;
235 278
236 switch (encap_type) { 279 switch (encap_type) {
@@ -248,31 +291,148 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
248 *skb_mac_header(skb) = IPPROTO_UDP; 291 *skb_mac_header(skb) = IPPROTO_UDP;
249 } 292 }
250 293
251 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 294 if (!skb_cloned(skb)) {
295 if (tailen <= skb_availroom(skb)) {
296 nfrags = 1;
297 trailer = skb;
298 tail = skb_tail_pointer(trailer);
252 299
253 aead_request_set_callback(req, 0, esp_output_done, skb); 300 goto skip_cow;
301 } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
302 && !skb_has_frag_list(skb)) {
303 int allocsize;
304 struct sock *sk = skb->sk;
305 struct page_frag *pfrag = &x->xfrag;
254 306
255 /* For ESN we move the header forward by 4 bytes to 307 allocsize = ALIGN(tailen, L1_CACHE_BYTES);
256 * accomodate the high bits. We will move it back after 308
257 * encryption. 309 spin_lock_bh(&x->lock);
258 */ 310
259 if ((x->props.flags & XFRM_STATE_ESN)) { 311 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
260 extra->esphoff = (unsigned char *)esph - 312 spin_unlock_bh(&x->lock);
261 skb_transport_header(skb); 313 goto cow;
262 esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); 314 }
263 extra->seqhi = esph->spi; 315
264 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); 316 page = pfrag->page;
265 aead_request_set_callback(req, 0, esp_output_done_esn, skb); 317 get_page(page);
318
319 vaddr = kmap_atomic(page);
320
321 tail = vaddr + pfrag->offset;
322
323 esp_output_fill_trailer(tail, tfclen, plen, proto);
324
325 kunmap_atomic(vaddr);
326
327 nfrags = skb_shinfo(skb)->nr_frags;
328
329 __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
330 tailen);
331 skb_shinfo(skb)->nr_frags = ++nfrags;
332
333 pfrag->offset = pfrag->offset + allocsize;
334 nfrags++;
335
336 skb->len += tailen;
337 skb->data_len += tailen;
338 skb->truesize += tailen;
339 if (sk)
340 atomic_add(tailen, &sk->sk_wmem_alloc);
341
342 skb_push(skb, -skb_network_offset(skb));
343
344 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
345 esph->spi = x->id.spi;
346
347 tmp = esp_alloc_tmp(aead, nfrags + 2, extralen);
348 if (!tmp) {
349 spin_unlock_bh(&x->lock);
350 err = -ENOMEM;
351 goto error;
352 }
353
354 extra = esp_tmp_extra(tmp);
355 iv = esp_tmp_iv(aead, tmp, extralen);
356 req = esp_tmp_req(aead, iv);
357 sg = esp_req_sg(aead, req);
358 dsg = &sg[nfrags];
359
360 esph = esp_output_set_extra(skb, esph, extra);
361
362 sg_init_table(sg, nfrags);
363 skb_to_sgvec(skb, sg,
364 (unsigned char *)esph - skb->data,
365 assoclen + ivlen + clen + alen);
366
367 allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
368
369 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
370 spin_unlock_bh(&x->lock);
371 err = -ENOMEM;
372 goto error;
373 }
374
375 skb_shinfo(skb)->nr_frags = 1;
376
377 page = pfrag->page;
378 get_page(page);
379 /* replace page frags in skb with new page */
380 __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
381 pfrag->offset = pfrag->offset + allocsize;
382
383 sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
384 skb_to_sgvec(skb, dsg,
385 (unsigned char *)esph - skb->data,
386 assoclen + ivlen + clen + alen);
387
388 spin_unlock_bh(&x->lock);
389
390 goto skip_cow2;
391 }
266 } 392 }
267 393
394cow:
395 err = skb_cow_data(skb, tailen, &trailer);
396 if (err < 0)
397 goto error;
398 nfrags = err;
399 tail = skb_tail_pointer(trailer);
400 esph = ip_esp_hdr(skb);
401
402skip_cow:
403 esp_output_fill_trailer(tail, tfclen, plen, proto);
404
405 pskb_put(skb, trailer, clen - skb->len + alen);
406 skb_push(skb, -skb_network_offset(skb));
407 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
268 esph->spi = x->id.spi; 408 esph->spi = x->id.spi;
269 409
410 tmp = esp_alloc_tmp(aead, nfrags, extralen);
411 if (!tmp) {
412 err = -ENOMEM;
413 goto error;
414 }
415
416 extra = esp_tmp_extra(tmp);
417 iv = esp_tmp_iv(aead, tmp, extralen);
418 req = esp_tmp_req(aead, iv);
419 sg = esp_req_sg(aead, req);
420 dsg = sg;
421
422 esph = esp_output_set_extra(skb, esph, extra);
423
270 sg_init_table(sg, nfrags); 424 sg_init_table(sg, nfrags);
271 skb_to_sgvec(skb, sg, 425 skb_to_sgvec(skb, sg,
272 (unsigned char *)esph - skb->data, 426 (unsigned char *)esph - skb->data,
273 assoclen + ivlen + clen + alen); 427 assoclen + ivlen + clen + alen);
274 428
275 aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); 429skip_cow2:
430 if ((x->props.flags & XFRM_STATE_ESN))
431 aead_request_set_callback(req, 0, esp_output_done_esn, skb);
432 else
433 aead_request_set_callback(req, 0, esp_output_done, skb);
434
435 aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
276 aead_request_set_ad(req, assoclen); 436 aead_request_set_ad(req, assoclen);
277 437
278 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + 438 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -298,6 +458,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
298 esp_output_restore_header(skb); 458 esp_output_restore_header(skb);
299 } 459 }
300 460
461 if (sg != dsg)
462 esp_ssg_unref(x, tmp);
301 kfree(tmp); 463 kfree(tmp);
302 464
303error: 465error:
@@ -401,6 +563,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
401 __skb_pull(skb, 4); 563 __skb_pull(skb, 4);
402} 564}
403 565
566static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
567{
568 struct xfrm_state *x = xfrm_input_state(skb);
569 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
570
571 /* For ESN we move the header forward by 4 bytes to
572 * accomodate the high bits. We will move it back after
573 * decryption.
574 */
575 if ((x->props.flags & XFRM_STATE_ESN)) {
576 esph = (void *)skb_push(skb, 4);
577 *seqhi = esph->spi;
578 esph->spi = esph->seq_no;
579 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
580 }
581}
582
404static void esp_input_done_esn(struct crypto_async_request *base, int err) 583static void esp_input_done_esn(struct crypto_async_request *base, int err)
405{ 584{
406 struct sk_buff *skb = base->data; 585 struct sk_buff *skb = base->data;
@@ -437,12 +616,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
437 if (elen <= 0) 616 if (elen <= 0)
438 goto out; 617 goto out;
439 618
440 err = skb_cow_data(skb, 0, &trailer);
441 if (err < 0)
442 goto out;
443
444 nfrags = err;
445
446 assoclen = sizeof(*esph); 619 assoclen = sizeof(*esph);
447 seqhilen = 0; 620 seqhilen = 0;
448 621
@@ -451,6 +624,26 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
451 assoclen += seqhilen; 624 assoclen += seqhilen;
452 } 625 }
453 626
627 if (!skb_cloned(skb)) {
628 if (!skb_is_nonlinear(skb)) {
629 nfrags = 1;
630
631 goto skip_cow;
632 } else if (!skb_has_frag_list(skb)) {
633 nfrags = skb_shinfo(skb)->nr_frags;
634 nfrags++;
635
636 goto skip_cow;
637 }
638 }
639
640 err = skb_cow_data(skb, 0, &trailer);
641 if (err < 0)
642 goto out;
643
644 nfrags = err;
645
646skip_cow:
454 err = -ENOMEM; 647 err = -ENOMEM;
455 tmp = esp_alloc_tmp(aead, nfrags, seqhilen); 648 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
456 if (!tmp) 649 if (!tmp)
@@ -462,26 +655,17 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
462 req = esp_tmp_req(aead, iv); 655 req = esp_tmp_req(aead, iv);
463 sg = esp_req_sg(aead, req); 656 sg = esp_req_sg(aead, req);
464 657
465 skb->ip_summed = CHECKSUM_NONE; 658 esp_input_set_header(skb, seqhi);
466 659
467 esph = (struct ip_esp_hdr *)skb->data; 660 sg_init_table(sg, nfrags);
661 skb_to_sgvec(skb, sg, 0, skb->len);
468 662
469 aead_request_set_callback(req, 0, esp_input_done, skb); 663 skb->ip_summed = CHECKSUM_NONE;
470 664
471 /* For ESN we move the header forward by 4 bytes to 665 if ((x->props.flags & XFRM_STATE_ESN))
472 * accomodate the high bits. We will move it back after
473 * decryption.
474 */
475 if ((x->props.flags & XFRM_STATE_ESN)) {
476 esph = (void *)skb_push(skb, 4);
477 *seqhi = esph->spi;
478 esph->spi = esph->seq_no;
479 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
480 aead_request_set_callback(req, 0, esp_input_done_esn, skb); 666 aead_request_set_callback(req, 0, esp_input_done_esn, skb);
481 } 667 else
482 668 aead_request_set_callback(req, 0, esp_input_done, skb);
483 sg_init_table(sg, nfrags);
484 skb_to_sgvec(skb, sg, 0, skb->len);
485 669
486 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); 670 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
487 aead_request_set_ad(req, assoclen); 671 aead_request_set_ad(req, assoclen);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 542074c00c78..d6660a8c0ea5 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -90,11 +90,3 @@ void __init xfrm4_state_init(void)
90{ 90{
91 xfrm_state_register_afinfo(&xfrm4_state_afinfo); 91 xfrm_state_register_afinfo(&xfrm4_state_afinfo);
92} 92}
93
94#if 0
95void __exit xfrm4_state_fini(void)
96{
97 xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
98}
99#endif /* 0 */
100
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 189eb10b742d..dda6035e3b84 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -474,6 +474,9 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
474 int hdr_len = skb_network_header_len(skb); 474 int hdr_len = skb_network_header_len(skb);
475 int ah_hlen = (ah->hdrlen + 2) << 2; 475 int ah_hlen = (ah->hdrlen + 2) << 2;
476 476
477 if (err)
478 goto out;
479
477 work_iph = AH_SKB_CB(skb)->tmp; 480 work_iph = AH_SKB_CB(skb)->tmp;
478 auth_data = ah_tmp_auth(work_iph, hdr_len); 481 auth_data = ah_tmp_auth(work_iph, hdr_len);
479 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); 482 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index cbcdd5db31f4..ff54faa75631 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -44,6 +44,8 @@
44#include <net/protocol.h> 44#include <net/protocol.h>
45#include <linux/icmpv6.h> 45#include <linux/icmpv6.h>
46 46
47#include <linux/highmem.h>
48
47struct esp_skb_cb { 49struct esp_skb_cb {
48 struct xfrm_skb_cb xfrm; 50 struct xfrm_skb_cb xfrm;
49 void *tmp; 51 void *tmp;
@@ -114,11 +116,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
114 __alignof__(struct scatterlist)); 116 __alignof__(struct scatterlist));
115} 117}
116 118
119static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
120{
121 __be32 *seqhi;
122 struct crypto_aead *aead = x->data;
123 int seqhilen = 0;
124 u8 *iv;
125 struct aead_request *req;
126 struct scatterlist *sg;
127
128 if (x->props.flags & XFRM_STATE_ESN)
129 seqhilen += sizeof(__be32);
130
131 seqhi = esp_tmp_seqhi(tmp);
132 iv = esp_tmp_iv(aead, tmp, seqhilen);
133 req = esp_tmp_req(aead, iv);
134
135 /* Unref skb_frag_pages in the src scatterlist if necessary.
136 * Skip the first sg which comes from skb->data.
137 */
138 if (req->src != req->dst)
139 for (sg = sg_next(req->src); sg; sg = sg_next(sg))
140 put_page(sg_page(sg));
141}
142
117static void esp_output_done(struct crypto_async_request *base, int err) 143static void esp_output_done(struct crypto_async_request *base, int err)
118{ 144{
119 struct sk_buff *skb = base->data; 145 struct sk_buff *skb = base->data;
146 void *tmp;
147 struct dst_entry *dst = skb_dst(skb);
148 struct xfrm_state *x = dst->xfrm;
120 149
121 kfree(ESP_SKB_CB(skb)->tmp); 150 tmp = ESP_SKB_CB(skb)->tmp;
151 esp_ssg_unref(x, tmp);
152 kfree(tmp);
122 xfrm_output_resume(skb, err); 153 xfrm_output_resume(skb, err);
123} 154}
124 155
@@ -138,6 +169,27 @@ static void esp_output_restore_header(struct sk_buff *skb)
138 esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); 169 esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
139} 170}
140 171
172static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
173 struct ip_esp_hdr *esph,
174 __be32 *seqhi)
175{
176 struct xfrm_state *x = skb_dst(skb)->xfrm;
177
178 /* For ESN we move the header forward by 4 bytes to
179 * accomodate the high bits. We will move it back after
180 * encryption.
181 */
182 if ((x->props.flags & XFRM_STATE_ESN)) {
183 esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
184 *seqhi = esph->spi;
185 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
186 }
187
188 esph->spi = x->id.spi;
189
190 return esph;
191}
192
141static void esp_output_done_esn(struct crypto_async_request *base, int err) 193static void esp_output_done_esn(struct crypto_async_request *base, int err)
142{ 194{
143 struct sk_buff *skb = base->data; 195 struct sk_buff *skb = base->data;
@@ -146,14 +198,31 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
146 esp_output_done(base, err); 198 esp_output_done(base, err);
147} 199}
148 200
201static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
202{
203 /* Fill padding... */
204 if (tfclen) {
205 memset(tail, 0, tfclen);
206 tail += tfclen;
207 }
208 do {
209 int i;
210 for (i = 0; i < plen - 2; i++)
211 tail[i] = i + 1;
212 } while (0);
213 tail[plen - 2] = plen - 2;
214 tail[plen - 1] = proto;
215}
216
149static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 217static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
150{ 218{
151 int err; 219 int err;
152 struct ip_esp_hdr *esph; 220 struct ip_esp_hdr *esph;
153 struct crypto_aead *aead; 221 struct crypto_aead *aead;
154 struct aead_request *req; 222 struct aead_request *req;
155 struct scatterlist *sg; 223 struct scatterlist *sg, *dsg;
156 struct sk_buff *trailer; 224 struct sk_buff *trailer;
225 struct page *page;
157 void *tmp; 226 void *tmp;
158 int blksize; 227 int blksize;
159 int clen; 228 int clen;
@@ -164,10 +233,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
164 int nfrags; 233 int nfrags;
165 int assoclen; 234 int assoclen;
166 int seqhilen; 235 int seqhilen;
236 int tailen;
167 u8 *iv; 237 u8 *iv;
168 u8 *tail; 238 u8 *tail;
239 u8 *vaddr;
169 __be32 *seqhi; 240 __be32 *seqhi;
170 __be64 seqno; 241 __be64 seqno;
242 __u8 proto = *skb_mac_header(skb);
171 243
172 /* skb is pure payload to encrypt */ 244 /* skb is pure payload to encrypt */
173 aead = x->data; 245 aead = x->data;
@@ -186,11 +258,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
186 blksize = ALIGN(crypto_aead_blocksize(aead), 4); 258 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
187 clen = ALIGN(skb->len + 2 + tfclen, blksize); 259 clen = ALIGN(skb->len + 2 + tfclen, blksize);
188 plen = clen - skb->len - tfclen; 260 plen = clen - skb->len - tfclen;
189 261 tailen = tfclen + plen + alen;
190 err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
191 if (err < 0)
192 goto error;
193 nfrags = err;
194 262
195 assoclen = sizeof(*esph); 263 assoclen = sizeof(*esph);
196 seqhilen = 0; 264 seqhilen = 0;
@@ -200,59 +268,152 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
200 assoclen += seqhilen; 268 assoclen += seqhilen;
201 } 269 }
202 270
203 tmp = esp_alloc_tmp(aead, nfrags, seqhilen); 271 *skb_mac_header(skb) = IPPROTO_ESP;
204 if (!tmp) { 272 esph = ip_esp_hdr(skb);
205 err = -ENOMEM; 273
206 goto error; 274 if (!skb_cloned(skb)) {
275 if (tailen <= skb_availroom(skb)) {
276 nfrags = 1;
277 trailer = skb;
278 tail = skb_tail_pointer(trailer);
279
280 goto skip_cow;
281 } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
282 && !skb_has_frag_list(skb)) {
283 int allocsize;
284 struct sock *sk = skb->sk;
285 struct page_frag *pfrag = &x->xfrag;
286
287 allocsize = ALIGN(tailen, L1_CACHE_BYTES);
288
289 spin_lock_bh(&x->lock);
290
291 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
292 spin_unlock_bh(&x->lock);
293 goto cow;
294 }
295
296 page = pfrag->page;
297 get_page(page);
298
299 vaddr = kmap_atomic(page);
300
301 tail = vaddr + pfrag->offset;
302
303 esp_output_fill_trailer(tail, tfclen, plen, proto);
304
305 kunmap_atomic(vaddr);
306
307 nfrags = skb_shinfo(skb)->nr_frags;
308
309 __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
310 tailen);
311 skb_shinfo(skb)->nr_frags = ++nfrags;
312
313 pfrag->offset = pfrag->offset + allocsize;
314 nfrags++;
315
316 skb->len += tailen;
317 skb->data_len += tailen;
318 skb->truesize += tailen;
319 if (sk)
320 atomic_add(tailen, &sk->sk_wmem_alloc);
321
322 skb_push(skb, -skb_network_offset(skb));
323
324 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
325 esph->spi = x->id.spi;
326
327 tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen);
328 if (!tmp) {
329 spin_unlock_bh(&x->lock);
330 err = -ENOMEM;
331 goto error;
332 }
333 seqhi = esp_tmp_seqhi(tmp);
334 iv = esp_tmp_iv(aead, tmp, seqhilen);
335 req = esp_tmp_req(aead, iv);
336 sg = esp_req_sg(aead, req);
337 dsg = &sg[nfrags];
338
339 esph = esp_output_set_esn(skb, esph, seqhi);
340
341 sg_init_table(sg, nfrags);
342 skb_to_sgvec(skb, sg,
343 (unsigned char *)esph - skb->data,
344 assoclen + ivlen + clen + alen);
345
346 allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
347
348 if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
349 spin_unlock_bh(&x->lock);
350 err = -ENOMEM;
351 goto error;
352 }
353
354 skb_shinfo(skb)->nr_frags = 1;
355
356 page = pfrag->page;
357 get_page(page);
358 /* replace page frags in skb with new page */
359 __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
360 pfrag->offset = pfrag->offset + allocsize;
361
362 sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
363 skb_to_sgvec(skb, dsg,
364 (unsigned char *)esph - skb->data,
365 assoclen + ivlen + clen + alen);
366
367 spin_unlock_bh(&x->lock);
368
369 goto skip_cow2;
370 }
207 } 371 }
208 372
209 seqhi = esp_tmp_seqhi(tmp); 373cow:
210 iv = esp_tmp_iv(aead, tmp, seqhilen); 374 err = skb_cow_data(skb, tailen, &trailer);
211 req = esp_tmp_req(aead, iv); 375 if (err < 0)
212 sg = esp_req_sg(aead, req); 376 goto error;
377 nfrags = err;
213 378
214 /* Fill padding... */
215 tail = skb_tail_pointer(trailer); 379 tail = skb_tail_pointer(trailer);
216 if (tfclen) { 380 esph = ip_esp_hdr(skb);
217 memset(tail, 0, tfclen);
218 tail += tfclen;
219 }
220 do {
221 int i;
222 for (i = 0; i < plen - 2; i++)
223 tail[i] = i + 1;
224 } while (0);
225 tail[plen - 2] = plen - 2;
226 tail[plen - 1] = *skb_mac_header(skb);
227 pskb_put(skb, trailer, clen - skb->len + alen);
228 381
382skip_cow:
383 esp_output_fill_trailer(tail, tfclen, plen, proto);
384
385 pskb_put(skb, trailer, clen - skb->len + alen);
229 skb_push(skb, -skb_network_offset(skb)); 386 skb_push(skb, -skb_network_offset(skb));
230 esph = ip_esp_hdr(skb);
231 *skb_mac_header(skb) = IPPROTO_ESP;
232 387
233 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 388 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
389 esph->spi = x->id.spi;
234 390
235 aead_request_set_callback(req, 0, esp_output_done, skb); 391 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
236 392 if (!tmp) {
237 /* For ESN we move the header forward by 4 bytes to 393 err = -ENOMEM;
238 * accomodate the high bits. We will move it back after 394 goto error;
239 * encryption.
240 */
241 if ((x->props.flags & XFRM_STATE_ESN)) {
242 esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
243 *seqhi = esph->spi;
244 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
245 aead_request_set_callback(req, 0, esp_output_done_esn, skb);
246 } 395 }
247 396
248 esph->spi = x->id.spi; 397 seqhi = esp_tmp_seqhi(tmp);
398 iv = esp_tmp_iv(aead, tmp, seqhilen);
399 req = esp_tmp_req(aead, iv);
400 sg = esp_req_sg(aead, req);
401 dsg = sg;
402
403 esph = esp_output_set_esn(skb, esph, seqhi);
249 404
250 sg_init_table(sg, nfrags); 405 sg_init_table(sg, nfrags);
251 skb_to_sgvec(skb, sg, 406 skb_to_sgvec(skb, sg,
252 (unsigned char *)esph - skb->data, 407 (unsigned char *)esph - skb->data,
253 assoclen + ivlen + clen + alen); 408 assoclen + ivlen + clen + alen);
254 409
255 aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); 410skip_cow2:
411 if ((x->props.flags & XFRM_STATE_ESN))
412 aead_request_set_callback(req, 0, esp_output_done_esn, skb);
413 else
414 aead_request_set_callback(req, 0, esp_output_done, skb);
415
416 aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
256 aead_request_set_ad(req, assoclen); 417 aead_request_set_ad(req, assoclen);
257 418
258 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + 419 seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -278,6 +439,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
278 esp_output_restore_header(skb); 439 esp_output_restore_header(skb);
279 } 440 }
280 441
442 if (sg != dsg)
443 esp_ssg_unref(x, tmp);
281 kfree(tmp); 444 kfree(tmp);
282 445
283error: 446error:
@@ -343,6 +506,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
343 __skb_pull(skb, 4); 506 __skb_pull(skb, 4);
344} 507}
345 508
509static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
510{
511 struct xfrm_state *x = xfrm_input_state(skb);
512 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
513
514 /* For ESN we move the header forward by 4 bytes to
515 * accomodate the high bits. We will move it back after
516 * decryption.
517 */
518 if ((x->props.flags & XFRM_STATE_ESN)) {
519 esph = (void *)skb_push(skb, 4);
520 *seqhi = esph->spi;
521 esph->spi = esph->seq_no;
522 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
523 }
524}
525
346static void esp_input_done_esn(struct crypto_async_request *base, int err) 526static void esp_input_done_esn(struct crypto_async_request *base, int err)
347{ 527{
348 struct sk_buff *skb = base->data; 528 struct sk_buff *skb = base->data;
@@ -378,14 +558,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
378 goto out; 558 goto out;
379 } 559 }
380 560
381 nfrags = skb_cow_data(skb, 0, &trailer);
382 if (nfrags < 0) {
383 ret = -EINVAL;
384 goto out;
385 }
386
387 ret = -ENOMEM;
388
389 assoclen = sizeof(*esph); 561 assoclen = sizeof(*esph);
390 seqhilen = 0; 562 seqhilen = 0;
391 563
@@ -394,6 +566,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
394 assoclen += seqhilen; 566 assoclen += seqhilen;
395 } 567 }
396 568
569 if (!skb_cloned(skb)) {
570 if (!skb_is_nonlinear(skb)) {
571 nfrags = 1;
572
573 goto skip_cow;
574 } else if (!skb_has_frag_list(skb)) {
575 nfrags = skb_shinfo(skb)->nr_frags;
576 nfrags++;
577
578 goto skip_cow;
579 }
580 }
581
582 nfrags = skb_cow_data(skb, 0, &trailer);
583 if (nfrags < 0) {
584 ret = -EINVAL;
585 goto out;
586 }
587
588skip_cow:
589 ret = -ENOMEM;
397 tmp = esp_alloc_tmp(aead, nfrags, seqhilen); 590 tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
398 if (!tmp) 591 if (!tmp)
399 goto out; 592 goto out;
@@ -404,26 +597,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
404 req = esp_tmp_req(aead, iv); 597 req = esp_tmp_req(aead, iv);
405 sg = esp_req_sg(aead, req); 598 sg = esp_req_sg(aead, req);
406 599
407 skb->ip_summed = CHECKSUM_NONE; 600 esp_input_set_header(skb, seqhi);
408 601
409 esph = (struct ip_esp_hdr *)skb->data; 602 sg_init_table(sg, nfrags);
603 skb_to_sgvec(skb, sg, 0, skb->len);
410 604
411 aead_request_set_callback(req, 0, esp_input_done, skb); 605 skb->ip_summed = CHECKSUM_NONE;
412 606
413 /* For ESN we move the header forward by 4 bytes to 607 if ((x->props.flags & XFRM_STATE_ESN))
414 * accomodate the high bits. We will move it back after
415 * decryption.
416 */
417 if ((x->props.flags & XFRM_STATE_ESN)) {
418 esph = (void *)skb_push(skb, 4);
419 *seqhi = esph->spi;
420 esph->spi = esph->seq_no;
421 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
422 aead_request_set_callback(req, 0, esp_input_done_esn, skb); 608 aead_request_set_callback(req, 0, esp_input_done_esn, skb);
423 } 609 else
424 610 aead_request_set_callback(req, 0, esp_input_done, skb);
425 sg_init_table(sg, nfrags);
426 skb_to_sgvec(skb, sg, 0, skb->len);
427 611
428 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); 612 aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
429 aead_request_set_ad(req, assoclen); 613 aead_request_set_ad(req, assoclen);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6e3f0254d8a1..3213fe8027be 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,9 @@ static struct kmem_cache *secpath_cachep __read_mostly;
21static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 21static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
22static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; 22static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO];
23 23
24static struct gro_cells gro_cells;
25static struct net_device xfrm_napi_dev;
26
24int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) 27int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
25{ 28{
26 int err = 0; 29 int err = 0;
@@ -371,7 +374,7 @@ resume:
371 374
372 if (decaps) { 375 if (decaps) {
373 skb_dst_drop(skb); 376 skb_dst_drop(skb);
374 netif_rx(skb); 377 gro_cells_receive(&gro_cells, skb);
375 return 0; 378 return 0;
376 } else { 379 } else {
377 return x->inner_mode->afinfo->transport_finish(skb, async); 380 return x->inner_mode->afinfo->transport_finish(skb, async);
@@ -394,6 +397,13 @@ EXPORT_SYMBOL(xfrm_input_resume);
394 397
395void __init xfrm_input_init(void) 398void __init xfrm_input_init(void)
396{ 399{
400 int err;
401
402 init_dummy_netdev(&xfrm_napi_dev);
403 err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
404 if (err)
405 gro_cells.cells = NULL;
406
397 secpath_cachep = kmem_cache_create("secpath_cache", 407 secpath_cachep = kmem_cache_create("secpath_cache",
398 sizeof(struct sec_path), 408 sizeof(struct sec_path),
399 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 409 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 637387bbaaea..8ba29fe58352 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -246,10 +246,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
246 return; 246 return;
247 247
248 afinfo = xfrm_state_get_afinfo(proto); 248 afinfo = xfrm_state_get_afinfo(proto);
249 if (!afinfo) 249 if (afinfo)
250 return; 250 afinfo->local_error(skb, mtu);
251 251 rcu_read_unlock();
252 afinfo->local_error(skb, mtu);
253 xfrm_state_put_afinfo(afinfo);
254} 252}
255EXPORT_SYMBOL_GPL(xfrm_local_error); 253EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208e8ff5..99ad1af2927f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -330,7 +330,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
330} 330}
331EXPORT_SYMBOL(xfrm_policy_destroy); 331EXPORT_SYMBOL(xfrm_policy_destroy);
332 332
333/* Rule must be locked. Release descentant resources, announce 333/* Rule must be locked. Release descendant resources, announce
334 * entry dead. The rule must be unlinked from lists to the moment. 334 * entry dead. The rule must be unlinked from lists to the moment.
335 */ 335 */
336 336
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 64e3c82eedf6..5a597dbbe564 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -192,7 +192,7 @@ int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
192 else 192 else
193 err = -EEXIST; 193 err = -EEXIST;
194 spin_unlock_bh(&xfrm_type_lock); 194 spin_unlock_bh(&xfrm_type_lock);
195 xfrm_state_put_afinfo(afinfo); 195 rcu_read_unlock();
196 return err; 196 return err;
197} 197}
198EXPORT_SYMBOL(xfrm_register_type); 198EXPORT_SYMBOL(xfrm_register_type);
@@ -213,7 +213,7 @@ int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
213 else 213 else
214 typemap[type->proto] = NULL; 214 typemap[type->proto] = NULL;
215 spin_unlock_bh(&xfrm_type_lock); 215 spin_unlock_bh(&xfrm_type_lock);
216 xfrm_state_put_afinfo(afinfo); 216 rcu_read_unlock();
217 return err; 217 return err;
218} 218}
219EXPORT_SYMBOL(xfrm_unregister_type); 219EXPORT_SYMBOL(xfrm_unregister_type);
@@ -231,17 +231,18 @@ retry:
231 return NULL; 231 return NULL;
232 typemap = afinfo->type_map; 232 typemap = afinfo->type_map;
233 233
234 type = typemap[proto]; 234 type = READ_ONCE(typemap[proto]);
235 if (unlikely(type && !try_module_get(type->owner))) 235 if (unlikely(type && !try_module_get(type->owner)))
236 type = NULL; 236 type = NULL;
237
238 rcu_read_unlock();
239
237 if (!type && !modload_attempted) { 240 if (!type && !modload_attempted) {
238 xfrm_state_put_afinfo(afinfo);
239 request_module("xfrm-type-%d-%d", family, proto); 241 request_module("xfrm-type-%d-%d", family, proto);
240 modload_attempted = 1; 242 modload_attempted = 1;
241 goto retry; 243 goto retry;
242 } 244 }
243 245
244 xfrm_state_put_afinfo(afinfo);
245 return type; 246 return type;
246} 247}
247 248
@@ -280,7 +281,7 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
280 281
281out: 282out:
282 spin_unlock_bh(&xfrm_mode_lock); 283 spin_unlock_bh(&xfrm_mode_lock);
283 xfrm_state_put_afinfo(afinfo); 284 rcu_read_unlock();
284 return err; 285 return err;
285} 286}
286EXPORT_SYMBOL(xfrm_register_mode); 287EXPORT_SYMBOL(xfrm_register_mode);
@@ -308,7 +309,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
308 } 309 }
309 310
310 spin_unlock_bh(&xfrm_mode_lock); 311 spin_unlock_bh(&xfrm_mode_lock);
311 xfrm_state_put_afinfo(afinfo); 312 rcu_read_unlock();
312 return err; 313 return err;
313} 314}
314EXPORT_SYMBOL(xfrm_unregister_mode); 315EXPORT_SYMBOL(xfrm_unregister_mode);
@@ -327,17 +328,17 @@ retry:
327 if (unlikely(afinfo == NULL)) 328 if (unlikely(afinfo == NULL))
328 return NULL; 329 return NULL;
329 330
330 mode = afinfo->mode_map[encap]; 331 mode = READ_ONCE(afinfo->mode_map[encap]);
331 if (unlikely(mode && !try_module_get(mode->owner))) 332 if (unlikely(mode && !try_module_get(mode->owner)))
332 mode = NULL; 333 mode = NULL;
334
335 rcu_read_unlock();
333 if (!mode && !modload_attempted) { 336 if (!mode && !modload_attempted) {
334 xfrm_state_put_afinfo(afinfo);
335 request_module("xfrm-mode-%d-%d", family, encap); 337 request_module("xfrm-mode-%d-%d", family, encap);
336 modload_attempted = 1; 338 modload_attempted = 1;
337 goto retry; 339 goto retry;
338 } 340 }
339 341
340 xfrm_state_put_afinfo(afinfo);
341 return mode; 342 return mode;
342} 343}
343 344
@@ -409,7 +410,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
409 if (x->xflags & XFRM_SOFT_EXPIRE) { 410 if (x->xflags & XFRM_SOFT_EXPIRE) {
410 /* enter hard expire without soft expire first?! 411 /* enter hard expire without soft expire first?!
411 * setting a new date could trigger this. 412 * setting a new date could trigger this.
412 * workarbound: fix x->curflt.add_time by below: 413 * workaround: fix x->curflt.add_time by below:
413 */ 414 */
414 x->curlft.add_time = now - x->saved_tmo - 1; 415 x->curlft.add_time = now - x->saved_tmo - 1;
415 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; 416 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
@@ -639,26 +640,25 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
639} 640}
640EXPORT_SYMBOL(xfrm_sad_getinfo); 641EXPORT_SYMBOL(xfrm_sad_getinfo);
641 642
642static int 643static void
643xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, 644xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
644 const struct xfrm_tmpl *tmpl, 645 const struct xfrm_tmpl *tmpl,
645 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 646 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
646 unsigned short family) 647 unsigned short family)
647{ 648{
648 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 649 struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
650
649 if (!afinfo) 651 if (!afinfo)
650 return -1; 652 return;
653
651 afinfo->init_tempsel(&x->sel, fl); 654 afinfo->init_tempsel(&x->sel, fl);
652 655
653 if (family != tmpl->encap_family) { 656 if (family != tmpl->encap_family) {
654 xfrm_state_put_afinfo(afinfo); 657 afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
655 afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
656 if (!afinfo) 658 if (!afinfo)
657 return -1; 659 return;
658 } 660 }
659 afinfo->init_temprop(x, tmpl, daddr, saddr); 661 afinfo->init_temprop(x, tmpl, daddr, saddr);
660 xfrm_state_put_afinfo(afinfo);
661 return 0;
662} 662}
663 663
664static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, 664static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
@@ -1474,7 +1474,7 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1474 if (afinfo->tmpl_sort) 1474 if (afinfo->tmpl_sort)
1475 err = afinfo->tmpl_sort(dst, src, n); 1475 err = afinfo->tmpl_sort(dst, src, n);
1476 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1476 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1477 xfrm_state_put_afinfo(afinfo); 1477 rcu_read_unlock();
1478 return err; 1478 return err;
1479} 1479}
1480EXPORT_SYMBOL(xfrm_tmpl_sort); 1480EXPORT_SYMBOL(xfrm_tmpl_sort);
@@ -1494,7 +1494,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1494 if (afinfo->state_sort) 1494 if (afinfo->state_sort)
1495 err = afinfo->state_sort(dst, src, n); 1495 err = afinfo->state_sort(dst, src, n);
1496 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1496 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1497 xfrm_state_put_afinfo(afinfo); 1497 rcu_read_unlock();
1498 return err; 1498 return err;
1499} 1499}
1500EXPORT_SYMBOL(xfrm_state_sort); 1500EXPORT_SYMBOL(xfrm_state_sort);
@@ -1932,10 +1932,10 @@ EXPORT_SYMBOL(xfrm_unregister_km);
1932int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) 1932int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1933{ 1933{
1934 int err = 0; 1934 int err = 0;
1935 if (unlikely(afinfo == NULL)) 1935
1936 return -EINVAL; 1936 if (WARN_ON(afinfo->family >= NPROTO))
1937 if (unlikely(afinfo->family >= NPROTO))
1938 return -EAFNOSUPPORT; 1937 return -EAFNOSUPPORT;
1938
1939 spin_lock_bh(&xfrm_state_afinfo_lock); 1939 spin_lock_bh(&xfrm_state_afinfo_lock);
1940 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) 1940 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1941 err = -EEXIST; 1941 err = -EEXIST;
@@ -1948,14 +1948,14 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo);
1948 1948
1949int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) 1949int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1950{ 1950{
1951 int err = 0; 1951 int err = 0, family = afinfo->family;
1952 if (unlikely(afinfo == NULL)) 1952
1953 return -EINVAL; 1953 if (WARN_ON(family >= NPROTO))
1954 if (unlikely(afinfo->family >= NPROTO))
1955 return -EAFNOSUPPORT; 1954 return -EAFNOSUPPORT;
1955
1956 spin_lock_bh(&xfrm_state_afinfo_lock); 1956 spin_lock_bh(&xfrm_state_afinfo_lock);
1957 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { 1957 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1958 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) 1958 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
1959 err = -EINVAL; 1959 err = -EINVAL;
1960 else 1960 else
1961 RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); 1961 RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
@@ -1966,6 +1966,14 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1966} 1966}
1967EXPORT_SYMBOL(xfrm_state_unregister_afinfo); 1967EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1968 1968
1969struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
1970{
1971 if (unlikely(family >= NPROTO))
1972 return NULL;
1973
1974 return rcu_dereference(xfrm_state_afinfo[family]);
1975}
1976
1969struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) 1977struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1970{ 1978{
1971 struct xfrm_state_afinfo *afinfo; 1979 struct xfrm_state_afinfo *afinfo;
@@ -1978,11 +1986,6 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1978 return afinfo; 1986 return afinfo;
1979} 1987}
1980 1988
1981void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1982{
1983 rcu_read_unlock();
1984}
1985
1986/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ 1989/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1987void xfrm_state_delete_tunnel(struct xfrm_state *x) 1990void xfrm_state_delete_tunnel(struct xfrm_state *x)
1988{ 1991{
@@ -2000,16 +2003,13 @@ EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2000 2003
2001int xfrm_state_mtu(struct xfrm_state *x, int mtu) 2004int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2002{ 2005{
2003 int res; 2006 const struct xfrm_type *type = READ_ONCE(x->type);
2004 2007
2005 spin_lock_bh(&x->lock);
2006 if (x->km.state == XFRM_STATE_VALID && 2008 if (x->km.state == XFRM_STATE_VALID &&
2007 x->type && x->type->get_mtu) 2009 type && type->get_mtu)
2008 res = x->type->get_mtu(x, mtu); 2010 return type->get_mtu(x, mtu);
2009 else 2011
2010 res = mtu - x->props.header_len; 2012 return mtu - x->props.header_len;
2011 spin_unlock_bh(&x->lock);
2012 return res;
2013} 2013}
2014 2014
2015int __xfrm_init_state(struct xfrm_state *x, bool init_replay) 2015int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
@@ -2028,7 +2028,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
2028 if (afinfo->init_flags) 2028 if (afinfo->init_flags)
2029 err = afinfo->init_flags(x); 2029 err = afinfo->init_flags(x);
2030 2030
2031 xfrm_state_put_afinfo(afinfo); 2031 rcu_read_unlock();
2032 2032
2033 if (err) 2033 if (err)
2034 goto error; 2034 goto error;