aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 19:10:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 19:10:49 -0500
commitcbfe0de303a55ed96d8831c2d5f56f8131cd6612 (patch)
treeb327762303c6a015421e4077e7c713b8a47a5e0e /mm
parent8322b6fddfd2cee41a7732284e5f04750511f4b2 (diff)
parentba00410b8131b23edfb0e09f8b6dd26c8eb621fb (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS changes from Al Viro: "First pile out of several (there _definitely_ will be more). Stuff in this one: - unification of d_splice_alias()/d_materialize_unique() - iov_iter rewrite - killing a bunch of ->f_path.dentry users (and f_dentry macro). Getting that completed will make life much simpler for unionmount/overlayfs, since then we'll be able to limit the places sensitive to file _dentry_ to reasonably few. Which allows to have file_inode(file) pointing to inode in a covered layer, with dentry pointing to (negative) dentry in union one. Still not complete, but much closer now. - crapectomy in lustre (dead code removal, mostly) - "let's make seq_printf return nothing" preparations - assorted cleanups and fixes There _definitely_ will be more piles" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits) copy_from_iter_nocache() new helper: iov_iter_kvec() csum_and_copy_..._iter() iov_iter.c: handle ITER_KVEC directly iov_iter.c: convert copy_to_iter() to iterate_and_advance iov_iter.c: convert copy_from_iter() to iterate_and_advance iov_iter.c: get rid of bvec_copy_page_{to,from}_iter() iov_iter.c: convert iov_iter_zero() to iterate_and_advance iov_iter.c: convert iov_iter_get_pages_alloc() to iterate_all_kinds iov_iter.c: convert iov_iter_get_pages() to iterate_all_kinds iov_iter.c: convert iov_iter_npages() to iterate_all_kinds iov_iter.c: iterate_and_advance iov_iter.c: macros for iterating over iov_iter kill f_dentry macro dcache: fix kmemcheck warning in switch_names new helper: audit_file() nfsd_vfs_write(): use file_inode() ncpfs: use file_inode() kill f_dentry uses lockd: get rid of ->f_path.dentry->d_sb ...
Diffstat (limited to 'mm')
-rw-r--r--mm/iov_iter.c1058
-rw-r--r--mm/memcontrol.c4
2 files changed, 422 insertions, 640 deletions
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index e34a3cb6aad6..a1599ca4ab0e 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -3,95 +3,136 @@
3#include <linux/pagemap.h> 3#include <linux/pagemap.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
6 6#include <net/checksum.h>
7static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i) 7
8{ 8#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
9 size_t skip, copy, left, wanted; 9 size_t left; \
10 const struct iovec *iov; 10 size_t wanted = n; \
11 char __user *buf; 11 __p = i->iov; \
12 12 __v.iov_len = min(n, __p->iov_len - skip); \
13 if (unlikely(bytes > i->count)) 13 if (likely(__v.iov_len)) { \
14 bytes = i->count; 14 __v.iov_base = __p->iov_base + skip; \
15 15 left = (STEP); \
16 if (unlikely(!bytes)) 16 __v.iov_len -= left; \
17 return 0; 17 skip += __v.iov_len; \
18 18 n -= __v.iov_len; \
19 wanted = bytes; 19 } else { \
20 iov = i->iov; 20 left = 0; \
21 skip = i->iov_offset; 21 } \
22 buf = iov->iov_base + skip; 22 while (unlikely(!left && n)) { \
23 copy = min(bytes, iov->iov_len - skip); 23 __p++; \
24 24 __v.iov_len = min(n, __p->iov_len); \
25 left = __copy_to_user(buf, from, copy); 25 if (unlikely(!__v.iov_len)) \
26 copy -= left; 26 continue; \
27 skip += copy; 27 __v.iov_base = __p->iov_base; \
28 from += copy; 28 left = (STEP); \
29 bytes -= copy; 29 __v.iov_len -= left; \
30 while (unlikely(!left && bytes)) { 30 skip = __v.iov_len; \
31 iov++; 31 n -= __v.iov_len; \
32 buf = iov->iov_base; 32 } \
33 copy = min(bytes, iov->iov_len); 33 n = wanted - n; \
34 left = __copy_to_user(buf, from, copy); 34}
35 copy -= left; 35
36 skip = copy; 36#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
37 from += copy; 37 size_t wanted = n; \
38 bytes -= copy; 38 __p = i->kvec; \
39 } 39 __v.iov_len = min(n, __p->iov_len - skip); \
40 40 if (likely(__v.iov_len)) { \
41 if (skip == iov->iov_len) { 41 __v.iov_base = __p->iov_base + skip; \
42 iov++; 42 (void)(STEP); \
43 skip = 0; 43 skip += __v.iov_len; \
44 } 44 n -= __v.iov_len; \
45 i->count -= wanted - bytes; 45 } \
46 i->nr_segs -= iov - i->iov; 46 while (unlikely(n)) { \
47 i->iov = iov; 47 __p++; \
48 i->iov_offset = skip; 48 __v.iov_len = min(n, __p->iov_len); \
49 return wanted - bytes; 49 if (unlikely(!__v.iov_len)) \
50} 50 continue; \
51 51 __v.iov_base = __p->iov_base; \
52static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i) 52 (void)(STEP); \
53{ 53 skip = __v.iov_len; \
54 size_t skip, copy, left, wanted; 54 n -= __v.iov_len; \
55 const struct iovec *iov; 55 } \
56 char __user *buf; 56 n = wanted; \
57 57}
58 if (unlikely(bytes > i->count)) 58
59 bytes = i->count; 59#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
60 60 size_t wanted = n; \
61 if (unlikely(!bytes)) 61 __p = i->bvec; \
62 return 0; 62 __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
63 63 if (likely(__v.bv_len)) { \
64 wanted = bytes; 64 __v.bv_page = __p->bv_page; \
65 iov = i->iov; 65 __v.bv_offset = __p->bv_offset + skip; \
66 skip = i->iov_offset; 66 (void)(STEP); \
67 buf = iov->iov_base + skip; 67 skip += __v.bv_len; \
68 copy = min(bytes, iov->iov_len - skip); 68 n -= __v.bv_len; \
69 69 } \
70 left = __copy_from_user(to, buf, copy); 70 while (unlikely(n)) { \
71 copy -= left; 71 __p++; \
72 skip += copy; 72 __v.bv_len = min_t(size_t, n, __p->bv_len); \
73 to += copy; 73 if (unlikely(!__v.bv_len)) \
74 bytes -= copy; 74 continue; \
75 while (unlikely(!left && bytes)) { 75 __v.bv_page = __p->bv_page; \
76 iov++; 76 __v.bv_offset = __p->bv_offset; \
77 buf = iov->iov_base; 77 (void)(STEP); \
78 copy = min(bytes, iov->iov_len); 78 skip = __v.bv_len; \
79 left = __copy_from_user(to, buf, copy); 79 n -= __v.bv_len; \
80 copy -= left; 80 } \
81 skip = copy; 81 n = wanted; \
82 to += copy; 82}
83 bytes -= copy; 83
84 } 84#define iterate_all_kinds(i, n, v, I, B, K) { \
85 85 size_t skip = i->iov_offset; \
86 if (skip == iov->iov_len) { 86 if (unlikely(i->type & ITER_BVEC)) { \
87 iov++; 87 const struct bio_vec *bvec; \
88 skip = 0; 88 struct bio_vec v; \
89 } 89 iterate_bvec(i, n, v, bvec, skip, (B)) \
90 i->count -= wanted - bytes; 90 } else if (unlikely(i->type & ITER_KVEC)) { \
91 i->nr_segs -= iov - i->iov; 91 const struct kvec *kvec; \
92 i->iov = iov; 92 struct kvec v; \
93 i->iov_offset = skip; 93 iterate_kvec(i, n, v, kvec, skip, (K)) \
94 return wanted - bytes; 94 } else { \
95 const struct iovec *iov; \
96 struct iovec v; \
97 iterate_iovec(i, n, v, iov, skip, (I)) \
98 } \
99}
100
101#define iterate_and_advance(i, n, v, I, B, K) { \
102 size_t skip = i->iov_offset; \
103 if (unlikely(i->type & ITER_BVEC)) { \
104 const struct bio_vec *bvec; \
105 struct bio_vec v; \
106 iterate_bvec(i, n, v, bvec, skip, (B)) \
107 if (skip == bvec->bv_len) { \
108 bvec++; \
109 skip = 0; \
110 } \
111 i->nr_segs -= bvec - i->bvec; \
112 i->bvec = bvec; \
113 } else if (unlikely(i->type & ITER_KVEC)) { \
114 const struct kvec *kvec; \
115 struct kvec v; \
116 iterate_kvec(i, n, v, kvec, skip, (K)) \
117 if (skip == kvec->iov_len) { \
118 kvec++; \
119 skip = 0; \
120 } \
121 i->nr_segs -= kvec - i->kvec; \
122 i->kvec = kvec; \
123 } else { \
124 const struct iovec *iov; \
125 struct iovec v; \
126 iterate_iovec(i, n, v, iov, skip, (I)) \
127 if (skip == iov->iov_len) { \
128 iov++; \
129 skip = 0; \
130 } \
131 i->nr_segs -= iov - i->iov; \
132 i->iov = iov; \
133 } \
134 i->count -= n; \
135 i->iov_offset = skip; \
95} 136}
96 137
97static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 138static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
@@ -256,134 +297,6 @@ done:
256 return wanted - bytes; 297 return wanted - bytes;
257} 298}
258 299
259static size_t zero_iovec(size_t bytes, struct iov_iter *i)
260{
261 size_t skip, copy, left, wanted;
262 const struct iovec *iov;
263 char __user *buf;
264
265 if (unlikely(bytes > i->count))
266 bytes = i->count;
267
268 if (unlikely(!bytes))
269 return 0;
270
271 wanted = bytes;
272 iov = i->iov;
273 skip = i->iov_offset;
274 buf = iov->iov_base + skip;
275 copy = min(bytes, iov->iov_len - skip);
276
277 left = __clear_user(buf, copy);
278 copy -= left;
279 skip += copy;
280 bytes -= copy;
281
282 while (unlikely(!left && bytes)) {
283 iov++;
284 buf = iov->iov_base;
285 copy = min(bytes, iov->iov_len);
286 left = __clear_user(buf, copy);
287 copy -= left;
288 skip = copy;
289 bytes -= copy;
290 }
291
292 if (skip == iov->iov_len) {
293 iov++;
294 skip = 0;
295 }
296 i->count -= wanted - bytes;
297 i->nr_segs -= iov - i->iov;
298 i->iov = iov;
299 i->iov_offset = skip;
300 return wanted - bytes;
301}
302
303static size_t __iovec_copy_from_user_inatomic(char *vaddr,
304 const struct iovec *iov, size_t base, size_t bytes)
305{
306 size_t copied = 0, left = 0;
307
308 while (bytes) {
309 char __user *buf = iov->iov_base + base;
310 int copy = min(bytes, iov->iov_len - base);
311
312 base = 0;
313 left = __copy_from_user_inatomic(vaddr, buf, copy);
314 copied += copy;
315 bytes -= copy;
316 vaddr += copy;
317 iov++;
318
319 if (unlikely(left))
320 break;
321 }
322 return copied - left;
323}
324
325/*
326 * Copy as much as we can into the page and return the number of bytes which
327 * were successfully copied. If a fault is encountered then return the number of
328 * bytes which were copied.
329 */
330static size_t copy_from_user_atomic_iovec(struct page *page,
331 struct iov_iter *i, unsigned long offset, size_t bytes)
332{
333 char *kaddr;
334 size_t copied;
335
336 kaddr = kmap_atomic(page);
337 if (likely(i->nr_segs == 1)) {
338 int left;
339 char __user *buf = i->iov->iov_base + i->iov_offset;
340 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
341 copied = bytes - left;
342 } else {
343 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
344 i->iov, i->iov_offset, bytes);
345 }
346 kunmap_atomic(kaddr);
347
348 return copied;
349}
350
351static void advance_iovec(struct iov_iter *i, size_t bytes)
352{
353 BUG_ON(i->count < bytes);
354
355 if (likely(i->nr_segs == 1)) {
356 i->iov_offset += bytes;
357 i->count -= bytes;
358 } else {
359 const struct iovec *iov = i->iov;
360 size_t base = i->iov_offset;
361 unsigned long nr_segs = i->nr_segs;
362
363 /*
364 * The !iov->iov_len check ensures we skip over unlikely
365 * zero-length segments (without overruning the iovec).
366 */
367 while (bytes || unlikely(i->count && !iov->iov_len)) {
368 int copy;
369
370 copy = min(bytes, iov->iov_len - base);
371 BUG_ON(!i->count || i->count < copy);
372 i->count -= copy;
373 bytes -= copy;
374 base += copy;
375 if (iov->iov_len == base) {
376 iov++;
377 nr_segs--;
378 base = 0;
379 }
380 }
381 i->iov = iov;
382 i->iov_offset = base;
383 i->nr_segs = nr_segs;
384 }
385}
386
387/* 300/*
388 * Fault in the first iovec of the given iov_iter, to a maximum length 301 * Fault in the first iovec of the given iov_iter, to a maximum length
389 * of bytes. Returns 0 on success, or non-zero if the memory could not be 302 * of bytes. Returns 0 on success, or non-zero if the memory could not be
@@ -395,7 +308,7 @@ static void advance_iovec(struct iov_iter *i, size_t bytes)
395 */ 308 */
396int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 309int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
397{ 310{
398 if (!(i->type & ITER_BVEC)) { 311 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
399 char __user *buf = i->iov->iov_base + i->iov_offset; 312 char __user *buf = i->iov->iov_base + i->iov_offset;
400 bytes = min(bytes, i->iov->iov_len - i->iov_offset); 313 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
401 return fault_in_pages_readable(buf, bytes); 314 return fault_in_pages_readable(buf, bytes);
@@ -404,136 +317,25 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
404} 317}
405EXPORT_SYMBOL(iov_iter_fault_in_readable); 318EXPORT_SYMBOL(iov_iter_fault_in_readable);
406 319
407static unsigned long alignment_iovec(const struct iov_iter *i)
408{
409 const struct iovec *iov = i->iov;
410 unsigned long res;
411 size_t size = i->count;
412 size_t n;
413
414 if (!size)
415 return 0;
416
417 res = (unsigned long)iov->iov_base + i->iov_offset;
418 n = iov->iov_len - i->iov_offset;
419 if (n >= size)
420 return res | size;
421 size -= n;
422 res |= n;
423 while (size > (++iov)->iov_len) {
424 res |= (unsigned long)iov->iov_base | iov->iov_len;
425 size -= iov->iov_len;
426 }
427 res |= (unsigned long)iov->iov_base | size;
428 return res;
429}
430
431void iov_iter_init(struct iov_iter *i, int direction, 320void iov_iter_init(struct iov_iter *i, int direction,
432 const struct iovec *iov, unsigned long nr_segs, 321 const struct iovec *iov, unsigned long nr_segs,
433 size_t count) 322 size_t count)
434{ 323{
435 /* It will get better. Eventually... */ 324 /* It will get better. Eventually... */
436 if (segment_eq(get_fs(), KERNEL_DS)) 325 if (segment_eq(get_fs(), KERNEL_DS)) {
437 direction |= ITER_KVEC; 326 direction |= ITER_KVEC;
438 i->type = direction; 327 i->type = direction;
439 i->iov = iov; 328 i->kvec = (struct kvec *)iov;
329 } else {
330 i->type = direction;
331 i->iov = iov;
332 }
440 i->nr_segs = nr_segs; 333 i->nr_segs = nr_segs;
441 i->iov_offset = 0; 334 i->iov_offset = 0;
442 i->count = count; 335 i->count = count;
443} 336}
444EXPORT_SYMBOL(iov_iter_init); 337EXPORT_SYMBOL(iov_iter_init);
445 338
446static ssize_t get_pages_iovec(struct iov_iter *i,
447 struct page **pages, size_t maxsize, unsigned maxpages,
448 size_t *start)
449{
450 size_t offset = i->iov_offset;
451 const struct iovec *iov = i->iov;
452 size_t len;
453 unsigned long addr;
454 int n;
455 int res;
456
457 len = iov->iov_len - offset;
458 if (len > i->count)
459 len = i->count;
460 if (len > maxsize)
461 len = maxsize;
462 addr = (unsigned long)iov->iov_base + offset;
463 len += *start = addr & (PAGE_SIZE - 1);
464 if (len > maxpages * PAGE_SIZE)
465 len = maxpages * PAGE_SIZE;
466 addr &= ~(PAGE_SIZE - 1);
467 n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
468 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
469 if (unlikely(res < 0))
470 return res;
471 return (res == n ? len : res * PAGE_SIZE) - *start;
472}
473
474static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
475 struct page ***pages, size_t maxsize,
476 size_t *start)
477{
478 size_t offset = i->iov_offset;
479 const struct iovec *iov = i->iov;
480 size_t len;
481 unsigned long addr;
482 void *p;
483 int n;
484 int res;
485
486 len = iov->iov_len - offset;
487 if (len > i->count)
488 len = i->count;
489 if (len > maxsize)
490 len = maxsize;
491 addr = (unsigned long)iov->iov_base + offset;
492 len += *start = addr & (PAGE_SIZE - 1);
493 addr &= ~(PAGE_SIZE - 1);
494 n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
495
496 p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
497 if (!p)
498 p = vmalloc(n * sizeof(struct page *));
499 if (!p)
500 return -ENOMEM;
501
502 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
503 if (unlikely(res < 0)) {
504 kvfree(p);
505 return res;
506 }
507 *pages = p;
508 return (res == n ? len : res * PAGE_SIZE) - *start;
509}
510
511static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
512{
513 size_t offset = i->iov_offset;
514 size_t size = i->count;
515 const struct iovec *iov = i->iov;
516 int npages = 0;
517 int n;
518
519 for (n = 0; size && n < i->nr_segs; n++, iov++) {
520 unsigned long addr = (unsigned long)iov->iov_base + offset;
521 size_t len = iov->iov_len - offset;
522 offset = 0;
523 if (unlikely(!len)) /* empty segment */
524 continue;
525 if (len > size)
526 len = size;
527 npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
528 - addr / PAGE_SIZE;
529 if (npages >= maxpages) /* don't bother going further */
530 return maxpages;
531 size -= len;
532 offset = 0;
533 }
534 return min(npages, maxpages);
535}
536
537static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 339static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
538{ 340{
539 char *from = kmap_atomic(page); 341 char *from = kmap_atomic(page);
@@ -555,293 +357,78 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
555 kunmap_atomic(addr); 357 kunmap_atomic(addr);
556} 358}
557 359
558static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i) 360size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
559{ 361{
560 size_t skip, copy, wanted; 362 char *from = addr;
561 const struct bio_vec *bvec;
562
563 if (unlikely(bytes > i->count)) 363 if (unlikely(bytes > i->count))
564 bytes = i->count; 364 bytes = i->count;
565 365
566 if (unlikely(!bytes)) 366 if (unlikely(!bytes))
567 return 0; 367 return 0;
568 368
569 wanted = bytes; 369 iterate_and_advance(i, bytes, v,
570 bvec = i->bvec; 370 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
571 skip = i->iov_offset; 371 v.iov_len),
572 copy = min_t(size_t, bytes, bvec->bv_len - skip); 372 memcpy_to_page(v.bv_page, v.bv_offset,
373 (from += v.bv_len) - v.bv_len, v.bv_len),
374 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
375 )
573 376
574 memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); 377 return bytes;
575 skip += copy;
576 from += copy;
577 bytes -= copy;
578 while (bytes) {
579 bvec++;
580 copy = min(bytes, (size_t)bvec->bv_len);
581 memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
582 skip = copy;
583 from += copy;
584 bytes -= copy;
585 }
586 if (skip == bvec->bv_len) {
587 bvec++;
588 skip = 0;
589 }
590 i->count -= wanted - bytes;
591 i->nr_segs -= bvec - i->bvec;
592 i->bvec = bvec;
593 i->iov_offset = skip;
594 return wanted - bytes;
595} 378}
379EXPORT_SYMBOL(copy_to_iter);
596 380
597static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i) 381size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
598{ 382{
599 size_t skip, copy, wanted; 383 char *to = addr;
600 const struct bio_vec *bvec;
601
602 if (unlikely(bytes > i->count)) 384 if (unlikely(bytes > i->count))
603 bytes = i->count; 385 bytes = i->count;
604 386
605 if (unlikely(!bytes)) 387 if (unlikely(!bytes))
606 return 0; 388 return 0;
607 389
608 wanted = bytes; 390 iterate_and_advance(i, bytes, v,
609 bvec = i->bvec; 391 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
610 skip = i->iov_offset; 392 v.iov_len),
611 393 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
612 copy = min(bytes, bvec->bv_len - skip); 394 v.bv_offset, v.bv_len),
395 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
396 )
613 397
614 memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); 398 return bytes;
615
616 to += copy;
617 skip += copy;
618 bytes -= copy;
619
620 while (bytes) {
621 bvec++;
622 copy = min(bytes, (size_t)bvec->bv_len);
623 memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
624 skip = copy;
625 to += copy;
626 bytes -= copy;
627 }
628 if (skip == bvec->bv_len) {
629 bvec++;
630 skip = 0;
631 }
632 i->count -= wanted;
633 i->nr_segs -= bvec - i->bvec;
634 i->bvec = bvec;
635 i->iov_offset = skip;
636 return wanted;
637}
638
639static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
640 size_t bytes, struct iov_iter *i)
641{
642 void *kaddr = kmap_atomic(page);
643 size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
644 kunmap_atomic(kaddr);
645 return wanted;
646}
647
648static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
649 size_t bytes, struct iov_iter *i)
650{
651 void *kaddr = kmap_atomic(page);
652 size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);
653 kunmap_atomic(kaddr);
654 return wanted;
655} 399}
400EXPORT_SYMBOL(copy_from_iter);
656 401
657static size_t zero_bvec(size_t bytes, struct iov_iter *i) 402size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
658{ 403{
659 size_t skip, copy, wanted; 404 char *to = addr;
660 const struct bio_vec *bvec;
661
662 if (unlikely(bytes > i->count)) 405 if (unlikely(bytes > i->count))
663 bytes = i->count; 406 bytes = i->count;
664 407
665 if (unlikely(!bytes)) 408 if (unlikely(!bytes))
666 return 0; 409 return 0;
667 410
668 wanted = bytes; 411 iterate_and_advance(i, bytes, v,
669 bvec = i->bvec; 412 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
670 skip = i->iov_offset; 413 v.iov_base, v.iov_len),
671 copy = min_t(size_t, bytes, bvec->bv_len - skip); 414 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
672 415 v.bv_offset, v.bv_len),
673 memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy); 416 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
674 skip += copy; 417 )
675 bytes -= copy;
676 while (bytes) {
677 bvec++;
678 copy = min(bytes, (size_t)bvec->bv_len);
679 memzero_page(bvec->bv_page, bvec->bv_offset, copy);
680 skip = copy;
681 bytes -= copy;
682 }
683 if (skip == bvec->bv_len) {
684 bvec++;
685 skip = 0;
686 }
687 i->count -= wanted - bytes;
688 i->nr_segs -= bvec - i->bvec;
689 i->bvec = bvec;
690 i->iov_offset = skip;
691 return wanted - bytes;
692}
693 418
694static size_t copy_from_user_bvec(struct page *page,
695 struct iov_iter *i, unsigned long offset, size_t bytes)
696{
697 char *kaddr;
698 size_t left;
699 const struct bio_vec *bvec;
700 size_t base = i->iov_offset;
701
702 kaddr = kmap_atomic(page);
703 for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
704 size_t copy = min(left, bvec->bv_len - base);
705 if (!bvec->bv_len)
706 continue;
707 memcpy_from_page(kaddr + offset, bvec->bv_page,
708 bvec->bv_offset + base, copy);
709 offset += copy;
710 left -= copy;
711 }
712 kunmap_atomic(kaddr);
713 return bytes; 419 return bytes;
714} 420}
715 421EXPORT_SYMBOL(copy_from_iter_nocache);
716static void advance_bvec(struct iov_iter *i, size_t bytes)
717{
718 BUG_ON(i->count < bytes);
719
720 if (likely(i->nr_segs == 1)) {
721 i->iov_offset += bytes;
722 i->count -= bytes;
723 } else {
724 const struct bio_vec *bvec = i->bvec;
725 size_t base = i->iov_offset;
726 unsigned long nr_segs = i->nr_segs;
727
728 /*
729 * The !iov->iov_len check ensures we skip over unlikely
730 * zero-length segments (without overruning the iovec).
731 */
732 while (bytes || unlikely(i->count && !bvec->bv_len)) {
733 int copy;
734
735 copy = min(bytes, bvec->bv_len - base);
736 BUG_ON(!i->count || i->count < copy);
737 i->count -= copy;
738 bytes -= copy;
739 base += copy;
740 if (bvec->bv_len == base) {
741 bvec++;
742 nr_segs--;
743 base = 0;
744 }
745 }
746 i->bvec = bvec;
747 i->iov_offset = base;
748 i->nr_segs = nr_segs;
749 }
750}
751
752static unsigned long alignment_bvec(const struct iov_iter *i)
753{
754 const struct bio_vec *bvec = i->bvec;
755 unsigned long res;
756 size_t size = i->count;
757 size_t n;
758
759 if (!size)
760 return 0;
761
762 res = bvec->bv_offset + i->iov_offset;
763 n = bvec->bv_len - i->iov_offset;
764 if (n >= size)
765 return res | size;
766 size -= n;
767 res |= n;
768 while (size > (++bvec)->bv_len) {
769 res |= bvec->bv_offset | bvec->bv_len;
770 size -= bvec->bv_len;
771 }
772 res |= bvec->bv_offset | size;
773 return res;
774}
775
776static ssize_t get_pages_bvec(struct iov_iter *i,
777 struct page **pages, size_t maxsize, unsigned maxpages,
778 size_t *start)
779{
780 const struct bio_vec *bvec = i->bvec;
781 size_t len = bvec->bv_len - i->iov_offset;
782 if (len > i->count)
783 len = i->count;
784 if (len > maxsize)
785 len = maxsize;
786 /* can't be more than PAGE_SIZE */
787 *start = bvec->bv_offset + i->iov_offset;
788
789 get_page(*pages = bvec->bv_page);
790
791 return len;
792}
793
794static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
795 struct page ***pages, size_t maxsize,
796 size_t *start)
797{
798 const struct bio_vec *bvec = i->bvec;
799 size_t len = bvec->bv_len - i->iov_offset;
800 if (len > i->count)
801 len = i->count;
802 if (len > maxsize)
803 len = maxsize;
804 *start = bvec->bv_offset + i->iov_offset;
805
806 *pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
807 if (!*pages)
808 return -ENOMEM;
809
810 get_page(**pages = bvec->bv_page);
811
812 return len;
813}
814
815static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
816{
817 size_t offset = i->iov_offset;
818 size_t size = i->count;
819 const struct bio_vec *bvec = i->bvec;
820 int npages = 0;
821 int n;
822
823 for (n = 0; size && n < i->nr_segs; n++, bvec++) {
824 size_t len = bvec->bv_len - offset;
825 offset = 0;
826 if (unlikely(!len)) /* empty segment */
827 continue;
828 if (len > size)
829 len = size;
830 npages++;
831 if (npages >= maxpages) /* don't bother going further */
832 return maxpages;
833 size -= len;
834 offset = 0;
835 }
836 return min(npages, maxpages);
837}
838 422
839size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 423size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
840 struct iov_iter *i) 424 struct iov_iter *i)
841{ 425{
842 if (i->type & ITER_BVEC) 426 if (i->type & (ITER_BVEC|ITER_KVEC)) {
843 return copy_page_to_iter_bvec(page, offset, bytes, i); 427 void *kaddr = kmap_atomic(page);
844 else 428 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
429 kunmap_atomic(kaddr);
430 return wanted;
431 } else
845 return copy_page_to_iter_iovec(page, offset, bytes, i); 432 return copy_page_to_iter_iovec(page, offset, bytes, i);
846} 433}
847EXPORT_SYMBOL(copy_page_to_iter); 434EXPORT_SYMBOL(copy_page_to_iter);
@@ -849,57 +436,53 @@ EXPORT_SYMBOL(copy_page_to_iter);
849size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 436size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
850 struct iov_iter *i) 437 struct iov_iter *i)
851{ 438{
852 if (i->type & ITER_BVEC) 439 if (i->type & (ITER_BVEC|ITER_KVEC)) {
853 return copy_page_from_iter_bvec(page, offset, bytes, i); 440 void *kaddr = kmap_atomic(page);
854 else 441 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
442 kunmap_atomic(kaddr);
443 return wanted;
444 } else
855 return copy_page_from_iter_iovec(page, offset, bytes, i); 445 return copy_page_from_iter_iovec(page, offset, bytes, i);
856} 446}
857EXPORT_SYMBOL(copy_page_from_iter); 447EXPORT_SYMBOL(copy_page_from_iter);
858 448
859size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) 449size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
860{ 450{
861 if (i->type & ITER_BVEC) 451 if (unlikely(bytes > i->count))
862 return copy_to_iter_bvec(addr, bytes, i); 452 bytes = i->count;
863 else
864 return copy_to_iter_iovec(addr, bytes, i);
865}
866EXPORT_SYMBOL(copy_to_iter);
867 453
868size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 454 if (unlikely(!bytes))
869{ 455 return 0;
870 if (i->type & ITER_BVEC)
871 return copy_from_iter_bvec(addr, bytes, i);
872 else
873 return copy_from_iter_iovec(addr, bytes, i);
874}
875EXPORT_SYMBOL(copy_from_iter);
876 456
877size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 457 iterate_and_advance(i, bytes, v,
878{ 458 __clear_user(v.iov_base, v.iov_len),
879 if (i->type & ITER_BVEC) { 459 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
880 return zero_bvec(bytes, i); 460 memset(v.iov_base, 0, v.iov_len)
881 } else { 461 )
882 return zero_iovec(bytes, i); 462
883 } 463 return bytes;
884} 464}
885EXPORT_SYMBOL(iov_iter_zero); 465EXPORT_SYMBOL(iov_iter_zero);
886 466
887size_t iov_iter_copy_from_user_atomic(struct page *page, 467size_t iov_iter_copy_from_user_atomic(struct page *page,
888 struct iov_iter *i, unsigned long offset, size_t bytes) 468 struct iov_iter *i, unsigned long offset, size_t bytes)
889{ 469{
890 if (i->type & ITER_BVEC) 470 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
891 return copy_from_user_bvec(page, i, offset, bytes); 471 iterate_all_kinds(i, bytes, v,
892 else 472 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
893 return copy_from_user_atomic_iovec(page, i, offset, bytes); 473 v.iov_base, v.iov_len),
474 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
475 v.bv_offset, v.bv_len),
476 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
477 )
478 kunmap_atomic(kaddr);
479 return bytes;
894} 480}
895EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 481EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
896 482
897void iov_iter_advance(struct iov_iter *i, size_t size) 483void iov_iter_advance(struct iov_iter *i, size_t size)
898{ 484{
899 if (i->type & ITER_BVEC) 485 iterate_and_advance(i, size, v, 0, 0, 0)
900 advance_bvec(i, size);
901 else
902 advance_iovec(i, size);
903} 486}
904EXPORT_SYMBOL(iov_iter_advance); 487EXPORT_SYMBOL(iov_iter_advance);
905 488
@@ -917,12 +500,33 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
917} 500}
918EXPORT_SYMBOL(iov_iter_single_seg_count); 501EXPORT_SYMBOL(iov_iter_single_seg_count);
919 502
503void iov_iter_kvec(struct iov_iter *i, int direction,
504 const struct kvec *iov, unsigned long nr_segs,
505 size_t count)
506{
507 BUG_ON(!(direction & ITER_KVEC));
508 i->type = direction;
509 i->kvec = (struct kvec *)iov;
510 i->nr_segs = nr_segs;
511 i->iov_offset = 0;
512 i->count = count;
513}
514EXPORT_SYMBOL(iov_iter_kvec);
515
920unsigned long iov_iter_alignment(const struct iov_iter *i) 516unsigned long iov_iter_alignment(const struct iov_iter *i)
921{ 517{
922 if (i->type & ITER_BVEC) 518 unsigned long res = 0;
923 return alignment_bvec(i); 519 size_t size = i->count;
924 else 520
925 return alignment_iovec(i); 521 if (!size)
522 return 0;
523
524 iterate_all_kinds(i, size, v,
525 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
526 res |= v.bv_offset | v.bv_len,
527 res |= (unsigned long)v.iov_base | v.iov_len
528 )
529 return res;
926} 530}
927EXPORT_SYMBOL(iov_iter_alignment); 531EXPORT_SYMBOL(iov_iter_alignment);
928 532
@@ -930,29 +534,207 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
930 struct page **pages, size_t maxsize, unsigned maxpages, 534 struct page **pages, size_t maxsize, unsigned maxpages,
931 size_t *start) 535 size_t *start)
932{ 536{
933 if (i->type & ITER_BVEC) 537 if (maxsize > i->count)
934 return get_pages_bvec(i, pages, maxsize, maxpages, start); 538 maxsize = i->count;
935 else 539
936 return get_pages_iovec(i, pages, maxsize, maxpages, start); 540 if (!maxsize)
541 return 0;
542
543 iterate_all_kinds(i, maxsize, v, ({
544 unsigned long addr = (unsigned long)v.iov_base;
545 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
546 int n;
547 int res;
548
549 if (len > maxpages * PAGE_SIZE)
550 len = maxpages * PAGE_SIZE;
551 addr &= ~(PAGE_SIZE - 1);
552 n = DIV_ROUND_UP(len, PAGE_SIZE);
553 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
554 if (unlikely(res < 0))
555 return res;
556 return (res == n ? len : res * PAGE_SIZE) - *start;
557 0;}),({
558 /* can't be more than PAGE_SIZE */
559 *start = v.bv_offset;
560 get_page(*pages = v.bv_page);
561 return v.bv_len;
562 }),({
563 return -EFAULT;
564 })
565 )
566 return 0;
937} 567}
938EXPORT_SYMBOL(iov_iter_get_pages); 568EXPORT_SYMBOL(iov_iter_get_pages);
939 569
570static struct page **get_pages_array(size_t n)
571{
572 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
573 if (!p)
574 p = vmalloc(n * sizeof(struct page *));
575 return p;
576}
577
940ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 578ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
941 struct page ***pages, size_t maxsize, 579 struct page ***pages, size_t maxsize,
942 size_t *start) 580 size_t *start)
943{ 581{
944 if (i->type & ITER_BVEC) 582 struct page **p;
945 return get_pages_alloc_bvec(i, pages, maxsize, start); 583
946 else 584 if (maxsize > i->count)
947 return get_pages_alloc_iovec(i, pages, maxsize, start); 585 maxsize = i->count;
586
587 if (!maxsize)
588 return 0;
589
590 iterate_all_kinds(i, maxsize, v, ({
591 unsigned long addr = (unsigned long)v.iov_base;
592 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
593 int n;
594 int res;
595
596 addr &= ~(PAGE_SIZE - 1);
597 n = DIV_ROUND_UP(len, PAGE_SIZE);
598 p = get_pages_array(n);
599 if (!p)
600 return -ENOMEM;
601 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
602 if (unlikely(res < 0)) {
603 kvfree(p);
604 return res;
605 }
606 *pages = p;
607 return (res == n ? len : res * PAGE_SIZE) - *start;
608 0;}),({
609 /* can't be more than PAGE_SIZE */
610 *start = v.bv_offset;
611 *pages = p = get_pages_array(1);
612 if (!p)
613 return -ENOMEM;
614 get_page(*p = v.bv_page);
615 return v.bv_len;
616 }),({
617 return -EFAULT;
618 })
619 )
620 return 0;
948} 621}
949EXPORT_SYMBOL(iov_iter_get_pages_alloc); 622EXPORT_SYMBOL(iov_iter_get_pages_alloc);
950 623
624size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
625 struct iov_iter *i)
626{
627 char *to = addr;
628 __wsum sum, next;
629 size_t off = 0;
630 if (unlikely(bytes > i->count))
631 bytes = i->count;
632
633 if (unlikely(!bytes))
634 return 0;
635
636 sum = *csum;
637 iterate_and_advance(i, bytes, v, ({
638 int err = 0;
639 next = csum_and_copy_from_user(v.iov_base,
640 (to += v.iov_len) - v.iov_len,
641 v.iov_len, 0, &err);
642 if (!err) {
643 sum = csum_block_add(sum, next, off);
644 off += v.iov_len;
645 }
646 err ? v.iov_len : 0;
647 }), ({
648 char *p = kmap_atomic(v.bv_page);
649 next = csum_partial_copy_nocheck(p + v.bv_offset,
650 (to += v.bv_len) - v.bv_len,
651 v.bv_len, 0);
652 kunmap_atomic(p);
653 sum = csum_block_add(sum, next, off);
654 off += v.bv_len;
655 }),({
656 next = csum_partial_copy_nocheck(v.iov_base,
657 (to += v.iov_len) - v.iov_len,
658 v.iov_len, 0);
659 sum = csum_block_add(sum, next, off);
660 off += v.iov_len;
661 })
662 )
663 *csum = sum;
664 return bytes;
665}
666EXPORT_SYMBOL(csum_and_copy_from_iter);
667
668size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
669 struct iov_iter *i)
670{
671 char *from = addr;
672 __wsum sum, next;
673 size_t off = 0;
674 if (unlikely(bytes > i->count))
675 bytes = i->count;
676
677 if (unlikely(!bytes))
678 return 0;
679
680 sum = *csum;
681 iterate_and_advance(i, bytes, v, ({
682 int err = 0;
683 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
684 v.iov_base,
685 v.iov_len, 0, &err);
686 if (!err) {
687 sum = csum_block_add(sum, next, off);
688 off += v.iov_len;
689 }
690 err ? v.iov_len : 0;
691 }), ({
692 char *p = kmap_atomic(v.bv_page);
693 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
694 p + v.bv_offset,
695 v.bv_len, 0);
696 kunmap_atomic(p);
697 sum = csum_block_add(sum, next, off);
698 off += v.bv_len;
699 }),({
700 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
701 v.iov_base,
702 v.iov_len, 0);
703 sum = csum_block_add(sum, next, off);
704 off += v.iov_len;
705 })
706 )
707 *csum = sum;
708 return bytes;
709}
710EXPORT_SYMBOL(csum_and_copy_to_iter);
711
951int iov_iter_npages(const struct iov_iter *i, int maxpages) 712int iov_iter_npages(const struct iov_iter *i, int maxpages)
952{ 713{
953 if (i->type & ITER_BVEC) 714 size_t size = i->count;
954 return iov_iter_npages_bvec(i, maxpages); 715 int npages = 0;
955 else 716
956 return iov_iter_npages_iovec(i, maxpages); 717 if (!size)
718 return 0;
719
720 iterate_all_kinds(i, size, v, ({
721 unsigned long p = (unsigned long)v.iov_base;
722 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
723 - p / PAGE_SIZE;
724 if (npages >= maxpages)
725 return maxpages;
726 0;}),({
727 npages++;
728 if (npages >= maxpages)
729 return maxpages;
730 }),({
731 unsigned long p = (unsigned long)v.iov_base;
732 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
733 - p / PAGE_SIZE;
734 if (npages >= maxpages)
735 return maxpages;
736 })
737 )
738 return npages;
957} 739}
958EXPORT_SYMBOL(iov_iter_npages); 740EXPORT_SYMBOL(iov_iter_npages);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d6ac0e33e150..ee48428cf8e3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5064,7 +5064,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
5064 * 5064 *
5065 * DO NOT ADD NEW FILES. 5065 * DO NOT ADD NEW FILES.
5066 */ 5066 */
5067 name = cfile.file->f_dentry->d_name.name; 5067 name = cfile.file->f_path.dentry->d_name.name;
5068 5068
5069 if (!strcmp(name, "memory.usage_in_bytes")) { 5069 if (!strcmp(name, "memory.usage_in_bytes")) {
5070 event->register_event = mem_cgroup_usage_register_event; 5070 event->register_event = mem_cgroup_usage_register_event;
@@ -5088,7 +5088,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
5088 * automatically removed on cgroup destruction but the removal is 5088 * automatically removed on cgroup destruction but the removal is
5089 * asynchronous, so take an extra ref on @css. 5089 * asynchronous, so take an extra ref on @css.
5090 */ 5090 */
5091 cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent, 5091 cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
5092 &memory_cgrp_subsys); 5092 &memory_cgrp_subsys);
5093 ret = -EINVAL; 5093 ret = -EINVAL;
5094 if (IS_ERR(cfile_css)) 5094 if (IS_ERR(cfile_css))