diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 766 |
1 files changed, 537 insertions, 229 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 15c8413ee929..c6049e947cd9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
32 | #include <linux/cpuset.h> | 32 | #include <linux/cpuset.h> |
33 | #include "filemap.h" | 33 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
34 | #include "internal.h" | 34 | #include "internal.h" |
35 | 35 | ||
36 | /* | 36 | /* |
@@ -593,7 +593,7 @@ void fastcall __lock_page_nosync(struct page *page) | |||
593 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 593 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
594 | * If yes, increment its refcount and return it; if no, return NULL. | 594 | * If yes, increment its refcount and return it; if no, return NULL. |
595 | */ | 595 | */ |
596 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) | 596 | struct page * find_get_page(struct address_space *mapping, pgoff_t offset) |
597 | { | 597 | { |
598 | struct page *page; | 598 | struct page *page; |
599 | 599 | ||
@@ -617,30 +617,31 @@ EXPORT_SYMBOL(find_get_page); | |||
617 | * Returns zero if the page was not present. find_lock_page() may sleep. | 617 | * Returns zero if the page was not present. find_lock_page() may sleep. |
618 | */ | 618 | */ |
619 | struct page *find_lock_page(struct address_space *mapping, | 619 | struct page *find_lock_page(struct address_space *mapping, |
620 | unsigned long offset) | 620 | pgoff_t offset) |
621 | { | 621 | { |
622 | struct page *page; | 622 | struct page *page; |
623 | 623 | ||
624 | read_lock_irq(&mapping->tree_lock); | ||
625 | repeat: | 624 | repeat: |
625 | read_lock_irq(&mapping->tree_lock); | ||
626 | page = radix_tree_lookup(&mapping->page_tree, offset); | 626 | page = radix_tree_lookup(&mapping->page_tree, offset); |
627 | if (page) { | 627 | if (page) { |
628 | page_cache_get(page); | 628 | page_cache_get(page); |
629 | if (TestSetPageLocked(page)) { | 629 | if (TestSetPageLocked(page)) { |
630 | read_unlock_irq(&mapping->tree_lock); | 630 | read_unlock_irq(&mapping->tree_lock); |
631 | __lock_page(page); | 631 | __lock_page(page); |
632 | read_lock_irq(&mapping->tree_lock); | ||
633 | 632 | ||
634 | /* Has the page been truncated while we slept? */ | 633 | /* Has the page been truncated while we slept? */ |
635 | if (unlikely(page->mapping != mapping || | 634 | if (unlikely(page->mapping != mapping)) { |
636 | page->index != offset)) { | ||
637 | unlock_page(page); | 635 | unlock_page(page); |
638 | page_cache_release(page); | 636 | page_cache_release(page); |
639 | goto repeat; | 637 | goto repeat; |
640 | } | 638 | } |
639 | VM_BUG_ON(page->index != offset); | ||
640 | goto out; | ||
641 | } | 641 | } |
642 | } | 642 | } |
643 | read_unlock_irq(&mapping->tree_lock); | 643 | read_unlock_irq(&mapping->tree_lock); |
644 | out: | ||
644 | return page; | 645 | return page; |
645 | } | 646 | } |
646 | EXPORT_SYMBOL(find_lock_page); | 647 | EXPORT_SYMBOL(find_lock_page); |
@@ -663,29 +664,24 @@ EXPORT_SYMBOL(find_lock_page); | |||
663 | * memory exhaustion. | 664 | * memory exhaustion. |
664 | */ | 665 | */ |
665 | struct page *find_or_create_page(struct address_space *mapping, | 666 | struct page *find_or_create_page(struct address_space *mapping, |
666 | unsigned long index, gfp_t gfp_mask) | 667 | pgoff_t index, gfp_t gfp_mask) |
667 | { | 668 | { |
668 | struct page *page, *cached_page = NULL; | 669 | struct page *page; |
669 | int err; | 670 | int err; |
670 | repeat: | 671 | repeat: |
671 | page = find_lock_page(mapping, index); | 672 | page = find_lock_page(mapping, index); |
672 | if (!page) { | 673 | if (!page) { |
673 | if (!cached_page) { | 674 | page = __page_cache_alloc(gfp_mask); |
674 | cached_page = | 675 | if (!page) |
675 | __page_cache_alloc(gfp_mask); | 676 | return NULL; |
676 | if (!cached_page) | 677 | err = add_to_page_cache_lru(page, mapping, index, gfp_mask); |
677 | return NULL; | 678 | if (unlikely(err)) { |
679 | page_cache_release(page); | ||
680 | page = NULL; | ||
681 | if (err == -EEXIST) | ||
682 | goto repeat; | ||
678 | } | 683 | } |
679 | err = add_to_page_cache_lru(cached_page, mapping, | ||
680 | index, gfp_mask); | ||
681 | if (!err) { | ||
682 | page = cached_page; | ||
683 | cached_page = NULL; | ||
684 | } else if (err == -EEXIST) | ||
685 | goto repeat; | ||
686 | } | 684 | } |
687 | if (cached_page) | ||
688 | page_cache_release(cached_page); | ||
689 | return page; | 685 | return page; |
690 | } | 686 | } |
691 | EXPORT_SYMBOL(find_or_create_page); | 687 | EXPORT_SYMBOL(find_or_create_page); |
@@ -797,7 +793,7 @@ EXPORT_SYMBOL(find_get_pages_tag); | |||
797 | * and deadlock against the caller's locked page. | 793 | * and deadlock against the caller's locked page. |
798 | */ | 794 | */ |
799 | struct page * | 795 | struct page * |
800 | grab_cache_page_nowait(struct address_space *mapping, unsigned long index) | 796 | grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) |
801 | { | 797 | { |
802 | struct page *page = find_get_page(mapping, index); | 798 | struct page *page = find_get_page(mapping, index); |
803 | 799 | ||
@@ -859,34 +855,29 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
859 | * It may be NULL. | 855 | * It may be NULL. |
860 | */ | 856 | */ |
861 | void do_generic_mapping_read(struct address_space *mapping, | 857 | void do_generic_mapping_read(struct address_space *mapping, |
862 | struct file_ra_state *_ra, | 858 | struct file_ra_state *ra, |
863 | struct file *filp, | 859 | struct file *filp, |
864 | loff_t *ppos, | 860 | loff_t *ppos, |
865 | read_descriptor_t *desc, | 861 | read_descriptor_t *desc, |
866 | read_actor_t actor) | 862 | read_actor_t actor) |
867 | { | 863 | { |
868 | struct inode *inode = mapping->host; | 864 | struct inode *inode = mapping->host; |
869 | unsigned long index; | 865 | pgoff_t index; |
870 | unsigned long offset; | 866 | pgoff_t last_index; |
871 | unsigned long last_index; | 867 | pgoff_t prev_index; |
872 | unsigned long next_index; | 868 | unsigned long offset; /* offset into pagecache page */ |
873 | unsigned long prev_index; | ||
874 | unsigned int prev_offset; | 869 | unsigned int prev_offset; |
875 | struct page *cached_page; | ||
876 | int error; | 870 | int error; |
877 | struct file_ra_state ra = *_ra; | ||
878 | 871 | ||
879 | cached_page = NULL; | ||
880 | index = *ppos >> PAGE_CACHE_SHIFT; | 872 | index = *ppos >> PAGE_CACHE_SHIFT; |
881 | next_index = index; | 873 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; |
882 | prev_index = ra.prev_index; | 874 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); |
883 | prev_offset = ra.prev_offset; | ||
884 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 875 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
885 | offset = *ppos & ~PAGE_CACHE_MASK; | 876 | offset = *ppos & ~PAGE_CACHE_MASK; |
886 | 877 | ||
887 | for (;;) { | 878 | for (;;) { |
888 | struct page *page; | 879 | struct page *page; |
889 | unsigned long end_index; | 880 | pgoff_t end_index; |
890 | loff_t isize; | 881 | loff_t isize; |
891 | unsigned long nr, ret; | 882 | unsigned long nr, ret; |
892 | 883 | ||
@@ -895,7 +886,7 @@ find_page: | |||
895 | page = find_get_page(mapping, index); | 886 | page = find_get_page(mapping, index); |
896 | if (!page) { | 887 | if (!page) { |
897 | page_cache_sync_readahead(mapping, | 888 | page_cache_sync_readahead(mapping, |
898 | &ra, filp, | 889 | ra, filp, |
899 | index, last_index - index); | 890 | index, last_index - index); |
900 | page = find_get_page(mapping, index); | 891 | page = find_get_page(mapping, index); |
901 | if (unlikely(page == NULL)) | 892 | if (unlikely(page == NULL)) |
@@ -903,7 +894,7 @@ find_page: | |||
903 | } | 894 | } |
904 | if (PageReadahead(page)) { | 895 | if (PageReadahead(page)) { |
905 | page_cache_async_readahead(mapping, | 896 | page_cache_async_readahead(mapping, |
906 | &ra, filp, page, | 897 | ra, filp, page, |
907 | index, last_index - index); | 898 | index, last_index - index); |
908 | } | 899 | } |
909 | if (!PageUptodate(page)) | 900 | if (!PageUptodate(page)) |
@@ -966,7 +957,6 @@ page_ok: | |||
966 | index += offset >> PAGE_CACHE_SHIFT; | 957 | index += offset >> PAGE_CACHE_SHIFT; |
967 | offset &= ~PAGE_CACHE_MASK; | 958 | offset &= ~PAGE_CACHE_MASK; |
968 | prev_offset = offset; | 959 | prev_offset = offset; |
969 | ra.prev_offset = offset; | ||
970 | 960 | ||
971 | page_cache_release(page); | 961 | page_cache_release(page); |
972 | if (ret == nr && desc->count) | 962 | if (ret == nr && desc->count) |
@@ -1015,7 +1005,7 @@ readpage: | |||
1015 | } | 1005 | } |
1016 | unlock_page(page); | 1006 | unlock_page(page); |
1017 | error = -EIO; | 1007 | error = -EIO; |
1018 | shrink_readahead_size_eio(filp, &ra); | 1008 | shrink_readahead_size_eio(filp, ra); |
1019 | goto readpage_error; | 1009 | goto readpage_error; |
1020 | } | 1010 | } |
1021 | unlock_page(page); | 1011 | unlock_page(page); |
@@ -1034,33 +1024,29 @@ no_cached_page: | |||
1034 | * Ok, it wasn't cached, so we need to create a new | 1024 | * Ok, it wasn't cached, so we need to create a new |
1035 | * page.. | 1025 | * page.. |
1036 | */ | 1026 | */ |
1037 | if (!cached_page) { | 1027 | page = page_cache_alloc_cold(mapping); |
1038 | cached_page = page_cache_alloc_cold(mapping); | 1028 | if (!page) { |
1039 | if (!cached_page) { | 1029 | desc->error = -ENOMEM; |
1040 | desc->error = -ENOMEM; | 1030 | goto out; |
1041 | goto out; | ||
1042 | } | ||
1043 | } | 1031 | } |
1044 | error = add_to_page_cache_lru(cached_page, mapping, | 1032 | error = add_to_page_cache_lru(page, mapping, |
1045 | index, GFP_KERNEL); | 1033 | index, GFP_KERNEL); |
1046 | if (error) { | 1034 | if (error) { |
1035 | page_cache_release(page); | ||
1047 | if (error == -EEXIST) | 1036 | if (error == -EEXIST) |
1048 | goto find_page; | 1037 | goto find_page; |
1049 | desc->error = error; | 1038 | desc->error = error; |
1050 | goto out; | 1039 | goto out; |
1051 | } | 1040 | } |
1052 | page = cached_page; | ||
1053 | cached_page = NULL; | ||
1054 | goto readpage; | 1041 | goto readpage; |
1055 | } | 1042 | } |
1056 | 1043 | ||
1057 | out: | 1044 | out: |
1058 | *_ra = ra; | 1045 | ra->prev_pos = prev_index; |
1059 | _ra->prev_index = prev_index; | 1046 | ra->prev_pos <<= PAGE_CACHE_SHIFT; |
1047 | ra->prev_pos |= prev_offset; | ||
1060 | 1048 | ||
1061 | *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; | 1049 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; |
1062 | if (cached_page) | ||
1063 | page_cache_release(cached_page); | ||
1064 | if (filp) | 1050 | if (filp) |
1065 | file_accessed(filp); | 1051 | file_accessed(filp); |
1066 | } | 1052 | } |
@@ -1220,7 +1206,7 @@ EXPORT_SYMBOL(generic_file_aio_read); | |||
1220 | 1206 | ||
1221 | static ssize_t | 1207 | static ssize_t |
1222 | do_readahead(struct address_space *mapping, struct file *filp, | 1208 | do_readahead(struct address_space *mapping, struct file *filp, |
1223 | unsigned long index, unsigned long nr) | 1209 | pgoff_t index, unsigned long nr) |
1224 | { | 1210 | { |
1225 | if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) | 1211 | if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) |
1226 | return -EINVAL; | 1212 | return -EINVAL; |
@@ -1240,8 +1226,8 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) | |||
1240 | if (file) { | 1226 | if (file) { |
1241 | if (file->f_mode & FMODE_READ) { | 1227 | if (file->f_mode & FMODE_READ) { |
1242 | struct address_space *mapping = file->f_mapping; | 1228 | struct address_space *mapping = file->f_mapping; |
1243 | unsigned long start = offset >> PAGE_CACHE_SHIFT; | 1229 | pgoff_t start = offset >> PAGE_CACHE_SHIFT; |
1244 | unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT; | 1230 | pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; |
1245 | unsigned long len = end - start + 1; | 1231 | unsigned long len = end - start + 1; |
1246 | ret = do_readahead(mapping, file, start, len); | 1232 | ret = do_readahead(mapping, file, start, len); |
1247 | } | 1233 | } |
@@ -1251,7 +1237,6 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) | |||
1251 | } | 1237 | } |
1252 | 1238 | ||
1253 | #ifdef CONFIG_MMU | 1239 | #ifdef CONFIG_MMU |
1254 | static int FASTCALL(page_cache_read(struct file * file, unsigned long offset)); | ||
1255 | /** | 1240 | /** |
1256 | * page_cache_read - adds requested page to the page cache if not already there | 1241 | * page_cache_read - adds requested page to the page cache if not already there |
1257 | * @file: file to read | 1242 | * @file: file to read |
@@ -1260,7 +1245,7 @@ static int FASTCALL(page_cache_read(struct file * file, unsigned long offset)); | |||
1260 | * This adds the requested page to the page cache if it isn't already there, | 1245 | * This adds the requested page to the page cache if it isn't already there, |
1261 | * and schedules an I/O to read in its contents from disk. | 1246 | * and schedules an I/O to read in its contents from disk. |
1262 | */ | 1247 | */ |
1263 | static int fastcall page_cache_read(struct file * file, unsigned long offset) | 1248 | static int fastcall page_cache_read(struct file * file, pgoff_t offset) |
1264 | { | 1249 | { |
1265 | struct address_space *mapping = file->f_mapping; | 1250 | struct address_space *mapping = file->f_mapping; |
1266 | struct page *page; | 1251 | struct page *page; |
@@ -1349,7 +1334,7 @@ retry_find: | |||
1349 | * Do we miss much more than hit in this file? If so, | 1334 | * Do we miss much more than hit in this file? If so, |
1350 | * stop bothering with read-ahead. It will only hurt. | 1335 | * stop bothering with read-ahead. It will only hurt. |
1351 | */ | 1336 | */ |
1352 | if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS) | 1337 | if (ra->mmap_miss > MMAP_LOTSAMISS) |
1353 | goto no_cached_page; | 1338 | goto no_cached_page; |
1354 | 1339 | ||
1355 | /* | 1340 | /* |
@@ -1375,7 +1360,7 @@ retry_find: | |||
1375 | } | 1360 | } |
1376 | 1361 | ||
1377 | if (!did_readaround) | 1362 | if (!did_readaround) |
1378 | ra->mmap_hit++; | 1363 | ra->mmap_miss--; |
1379 | 1364 | ||
1380 | /* | 1365 | /* |
1381 | * We have a locked page in the page cache, now we need to check | 1366 | * We have a locked page in the page cache, now we need to check |
@@ -1396,7 +1381,7 @@ retry_find: | |||
1396 | * Found the page and have a reference on it. | 1381 | * Found the page and have a reference on it. |
1397 | */ | 1382 | */ |
1398 | mark_page_accessed(page); | 1383 | mark_page_accessed(page); |
1399 | ra->prev_index = page->index; | 1384 | ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; |
1400 | vmf->page = page; | 1385 | vmf->page = page; |
1401 | return ret | VM_FAULT_LOCKED; | 1386 | return ret | VM_FAULT_LOCKED; |
1402 | 1387 | ||
@@ -1501,39 +1486,32 @@ EXPORT_SYMBOL(generic_file_mmap); | |||
1501 | EXPORT_SYMBOL(generic_file_readonly_mmap); | 1486 | EXPORT_SYMBOL(generic_file_readonly_mmap); |
1502 | 1487 | ||
1503 | static struct page *__read_cache_page(struct address_space *mapping, | 1488 | static struct page *__read_cache_page(struct address_space *mapping, |
1504 | unsigned long index, | 1489 | pgoff_t index, |
1505 | int (*filler)(void *,struct page*), | 1490 | int (*filler)(void *,struct page*), |
1506 | void *data) | 1491 | void *data) |
1507 | { | 1492 | { |
1508 | struct page *page, *cached_page = NULL; | 1493 | struct page *page; |
1509 | int err; | 1494 | int err; |
1510 | repeat: | 1495 | repeat: |
1511 | page = find_get_page(mapping, index); | 1496 | page = find_get_page(mapping, index); |
1512 | if (!page) { | 1497 | if (!page) { |
1513 | if (!cached_page) { | 1498 | page = page_cache_alloc_cold(mapping); |
1514 | cached_page = page_cache_alloc_cold(mapping); | 1499 | if (!page) |
1515 | if (!cached_page) | 1500 | return ERR_PTR(-ENOMEM); |
1516 | return ERR_PTR(-ENOMEM); | 1501 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); |
1517 | } | 1502 | if (unlikely(err)) { |
1518 | err = add_to_page_cache_lru(cached_page, mapping, | 1503 | page_cache_release(page); |
1519 | index, GFP_KERNEL); | 1504 | if (err == -EEXIST) |
1520 | if (err == -EEXIST) | 1505 | goto repeat; |
1521 | goto repeat; | ||
1522 | if (err < 0) { | ||
1523 | /* Presumably ENOMEM for radix tree node */ | 1506 | /* Presumably ENOMEM for radix tree node */ |
1524 | page_cache_release(cached_page); | ||
1525 | return ERR_PTR(err); | 1507 | return ERR_PTR(err); |
1526 | } | 1508 | } |
1527 | page = cached_page; | ||
1528 | cached_page = NULL; | ||
1529 | err = filler(data, page); | 1509 | err = filler(data, page); |
1530 | if (err < 0) { | 1510 | if (err < 0) { |
1531 | page_cache_release(page); | 1511 | page_cache_release(page); |
1532 | page = ERR_PTR(err); | 1512 | page = ERR_PTR(err); |
1533 | } | 1513 | } |
1534 | } | 1514 | } |
1535 | if (cached_page) | ||
1536 | page_cache_release(cached_page); | ||
1537 | return page; | 1515 | return page; |
1538 | } | 1516 | } |
1539 | 1517 | ||
@@ -1542,7 +1520,7 @@ repeat: | |||
1542 | * after submitting it to the filler. | 1520 | * after submitting it to the filler. |
1543 | */ | 1521 | */ |
1544 | struct page *read_cache_page_async(struct address_space *mapping, | 1522 | struct page *read_cache_page_async(struct address_space *mapping, |
1545 | unsigned long index, | 1523 | pgoff_t index, |
1546 | int (*filler)(void *,struct page*), | 1524 | int (*filler)(void *,struct page*), |
1547 | void *data) | 1525 | void *data) |
1548 | { | 1526 | { |
@@ -1590,7 +1568,7 @@ EXPORT_SYMBOL(read_cache_page_async); | |||
1590 | * If the page does not get brought uptodate, return -EIO. | 1568 | * If the page does not get brought uptodate, return -EIO. |
1591 | */ | 1569 | */ |
1592 | struct page *read_cache_page(struct address_space *mapping, | 1570 | struct page *read_cache_page(struct address_space *mapping, |
1593 | unsigned long index, | 1571 | pgoff_t index, |
1594 | int (*filler)(void *,struct page*), | 1572 | int (*filler)(void *,struct page*), |
1595 | void *data) | 1573 | void *data) |
1596 | { | 1574 | { |
@@ -1610,40 +1588,6 @@ struct page *read_cache_page(struct address_space *mapping, | |||
1610 | EXPORT_SYMBOL(read_cache_page); | 1588 | EXPORT_SYMBOL(read_cache_page); |
1611 | 1589 | ||
1612 | /* | 1590 | /* |
1613 | * If the page was newly created, increment its refcount and add it to the | ||
1614 | * caller's lru-buffering pagevec. This function is specifically for | ||
1615 | * generic_file_write(). | ||
1616 | */ | ||
1617 | static inline struct page * | ||
1618 | __grab_cache_page(struct address_space *mapping, unsigned long index, | ||
1619 | struct page **cached_page, struct pagevec *lru_pvec) | ||
1620 | { | ||
1621 | int err; | ||
1622 | struct page *page; | ||
1623 | repeat: | ||
1624 | page = find_lock_page(mapping, index); | ||
1625 | if (!page) { | ||
1626 | if (!*cached_page) { | ||
1627 | *cached_page = page_cache_alloc(mapping); | ||
1628 | if (!*cached_page) | ||
1629 | return NULL; | ||
1630 | } | ||
1631 | err = add_to_page_cache(*cached_page, mapping, | ||
1632 | index, GFP_KERNEL); | ||
1633 | if (err == -EEXIST) | ||
1634 | goto repeat; | ||
1635 | if (err == 0) { | ||
1636 | page = *cached_page; | ||
1637 | page_cache_get(page); | ||
1638 | if (!pagevec_add(lru_pvec, page)) | ||
1639 | __pagevec_lru_add(lru_pvec); | ||
1640 | *cached_page = NULL; | ||
1641 | } | ||
1642 | } | ||
1643 | return page; | ||
1644 | } | ||
1645 | |||
1646 | /* | ||
1647 | * The logic we want is | 1591 | * The logic we want is |
1648 | * | 1592 | * |
1649 | * if suid or (sgid and xgrp) | 1593 | * if suid or (sgid and xgrp) |
@@ -1691,8 +1635,7 @@ int remove_suid(struct dentry *dentry) | |||
1691 | } | 1635 | } |
1692 | EXPORT_SYMBOL(remove_suid); | 1636 | EXPORT_SYMBOL(remove_suid); |
1693 | 1637 | ||
1694 | size_t | 1638 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
1695 | __filemap_copy_from_user_iovec_inatomic(char *vaddr, | ||
1696 | const struct iovec *iov, size_t base, size_t bytes) | 1639 | const struct iovec *iov, size_t base, size_t bytes) |
1697 | { | 1640 | { |
1698 | size_t copied = 0, left = 0; | 1641 | size_t copied = 0, left = 0; |
@@ -1715,6 +1658,124 @@ __filemap_copy_from_user_iovec_inatomic(char *vaddr, | |||
1715 | } | 1658 | } |
1716 | 1659 | ||
1717 | /* | 1660 | /* |
1661 | * Copy as much as we can into the page and return the number of bytes which | ||
1662 | * were sucessfully copied. If a fault is encountered then return the number of | ||
1663 | * bytes which were copied. | ||
1664 | */ | ||
1665 | size_t iov_iter_copy_from_user_atomic(struct page *page, | ||
1666 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
1667 | { | ||
1668 | char *kaddr; | ||
1669 | size_t copied; | ||
1670 | |||
1671 | BUG_ON(!in_atomic()); | ||
1672 | kaddr = kmap_atomic(page, KM_USER0); | ||
1673 | if (likely(i->nr_segs == 1)) { | ||
1674 | int left; | ||
1675 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1676 | left = __copy_from_user_inatomic_nocache(kaddr + offset, | ||
1677 | buf, bytes); | ||
1678 | copied = bytes - left; | ||
1679 | } else { | ||
1680 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
1681 | i->iov, i->iov_offset, bytes); | ||
1682 | } | ||
1683 | kunmap_atomic(kaddr, KM_USER0); | ||
1684 | |||
1685 | return copied; | ||
1686 | } | ||
1687 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | ||
1688 | |||
1689 | /* | ||
1690 | * This has the same sideeffects and return value as | ||
1691 | * iov_iter_copy_from_user_atomic(). | ||
1692 | * The difference is that it attempts to resolve faults. | ||
1693 | * Page must not be locked. | ||
1694 | */ | ||
1695 | size_t iov_iter_copy_from_user(struct page *page, | ||
1696 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
1697 | { | ||
1698 | char *kaddr; | ||
1699 | size_t copied; | ||
1700 | |||
1701 | kaddr = kmap(page); | ||
1702 | if (likely(i->nr_segs == 1)) { | ||
1703 | int left; | ||
1704 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1705 | left = __copy_from_user_nocache(kaddr + offset, buf, bytes); | ||
1706 | copied = bytes - left; | ||
1707 | } else { | ||
1708 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
1709 | i->iov, i->iov_offset, bytes); | ||
1710 | } | ||
1711 | kunmap(page); | ||
1712 | return copied; | ||
1713 | } | ||
1714 | EXPORT_SYMBOL(iov_iter_copy_from_user); | ||
1715 | |||
1716 | static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes) | ||
1717 | { | ||
1718 | if (likely(i->nr_segs == 1)) { | ||
1719 | i->iov_offset += bytes; | ||
1720 | } else { | ||
1721 | const struct iovec *iov = i->iov; | ||
1722 | size_t base = i->iov_offset; | ||
1723 | |||
1724 | while (bytes) { | ||
1725 | int copy = min(bytes, iov->iov_len - base); | ||
1726 | |||
1727 | bytes -= copy; | ||
1728 | base += copy; | ||
1729 | if (iov->iov_len == base) { | ||
1730 | iov++; | ||
1731 | base = 0; | ||
1732 | } | ||
1733 | } | ||
1734 | i->iov = iov; | ||
1735 | i->iov_offset = base; | ||
1736 | } | ||
1737 | } | ||
1738 | |||
1739 | void iov_iter_advance(struct iov_iter *i, size_t bytes) | ||
1740 | { | ||
1741 | BUG_ON(i->count < bytes); | ||
1742 | |||
1743 | __iov_iter_advance_iov(i, bytes); | ||
1744 | i->count -= bytes; | ||
1745 | } | ||
1746 | EXPORT_SYMBOL(iov_iter_advance); | ||
1747 | |||
1748 | /* | ||
1749 | * Fault in the first iovec of the given iov_iter, to a maximum length | ||
1750 | * of bytes. Returns 0 on success, or non-zero if the memory could not be | ||
1751 | * accessed (ie. because it is an invalid address). | ||
1752 | * | ||
1753 | * writev-intensive code may want this to prefault several iovecs -- that | ||
1754 | * would be possible (callers must not rely on the fact that _only_ the | ||
1755 | * first iovec will be faulted with the current implementation). | ||
1756 | */ | ||
1757 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | ||
1758 | { | ||
1759 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1760 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); | ||
1761 | return fault_in_pages_readable(buf, bytes); | ||
1762 | } | ||
1763 | EXPORT_SYMBOL(iov_iter_fault_in_readable); | ||
1764 | |||
1765 | /* | ||
1766 | * Return the count of just the current iov_iter segment. | ||
1767 | */ | ||
1768 | size_t iov_iter_single_seg_count(struct iov_iter *i) | ||
1769 | { | ||
1770 | const struct iovec *iov = i->iov; | ||
1771 | if (i->nr_segs == 1) | ||
1772 | return i->count; | ||
1773 | else | ||
1774 | return min(i->count, iov->iov_len - i->iov_offset); | ||
1775 | } | ||
1776 | EXPORT_SYMBOL(iov_iter_single_seg_count); | ||
1777 | |||
1778 | /* | ||
1718 | * Performs necessary checks before doing a write | 1779 | * Performs necessary checks before doing a write |
1719 | * | 1780 | * |
1720 | * Can adjust writing position or amount of bytes to write. | 1781 | * Can adjust writing position or amount of bytes to write. |
@@ -1796,6 +1857,91 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i | |||
1796 | } | 1857 | } |
1797 | EXPORT_SYMBOL(generic_write_checks); | 1858 | EXPORT_SYMBOL(generic_write_checks); |
1798 | 1859 | ||
1860 | int pagecache_write_begin(struct file *file, struct address_space *mapping, | ||
1861 | loff_t pos, unsigned len, unsigned flags, | ||
1862 | struct page **pagep, void **fsdata) | ||
1863 | { | ||
1864 | const struct address_space_operations *aops = mapping->a_ops; | ||
1865 | |||
1866 | if (aops->write_begin) { | ||
1867 | return aops->write_begin(file, mapping, pos, len, flags, | ||
1868 | pagep, fsdata); | ||
1869 | } else { | ||
1870 | int ret; | ||
1871 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
1872 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
1873 | struct inode *inode = mapping->host; | ||
1874 | struct page *page; | ||
1875 | again: | ||
1876 | page = __grab_cache_page(mapping, index); | ||
1877 | *pagep = page; | ||
1878 | if (!page) | ||
1879 | return -ENOMEM; | ||
1880 | |||
1881 | if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { | ||
1882 | /* | ||
1883 | * There is no way to resolve a short write situation | ||
1884 | * for a !Uptodate page (except by double copying in | ||
1885 | * the caller done by generic_perform_write_2copy). | ||
1886 | * | ||
1887 | * Instead, we have to bring it uptodate here. | ||
1888 | */ | ||
1889 | ret = aops->readpage(file, page); | ||
1890 | page_cache_release(page); | ||
1891 | if (ret) { | ||
1892 | if (ret == AOP_TRUNCATED_PAGE) | ||
1893 | goto again; | ||
1894 | return ret; | ||
1895 | } | ||
1896 | goto again; | ||
1897 | } | ||
1898 | |||
1899 | ret = aops->prepare_write(file, page, offset, offset+len); | ||
1900 | if (ret) { | ||
1901 | unlock_page(page); | ||
1902 | page_cache_release(page); | ||
1903 | if (pos + len > inode->i_size) | ||
1904 | vmtruncate(inode, inode->i_size); | ||
1905 | } | ||
1906 | return ret; | ||
1907 | } | ||
1908 | } | ||
1909 | EXPORT_SYMBOL(pagecache_write_begin); | ||
1910 | |||
1911 | int pagecache_write_end(struct file *file, struct address_space *mapping, | ||
1912 | loff_t pos, unsigned len, unsigned copied, | ||
1913 | struct page *page, void *fsdata) | ||
1914 | { | ||
1915 | const struct address_space_operations *aops = mapping->a_ops; | ||
1916 | int ret; | ||
1917 | |||
1918 | if (aops->write_end) { | ||
1919 | mark_page_accessed(page); | ||
1920 | ret = aops->write_end(file, mapping, pos, len, copied, | ||
1921 | page, fsdata); | ||
1922 | } else { | ||
1923 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
1924 | struct inode *inode = mapping->host; | ||
1925 | |||
1926 | flush_dcache_page(page); | ||
1927 | ret = aops->commit_write(file, page, offset, offset+len); | ||
1928 | unlock_page(page); | ||
1929 | mark_page_accessed(page); | ||
1930 | page_cache_release(page); | ||
1931 | |||
1932 | if (ret < 0) { | ||
1933 | if (pos + len > inode->i_size) | ||
1934 | vmtruncate(inode, inode->i_size); | ||
1935 | } else if (ret > 0) | ||
1936 | ret = min_t(size_t, copied, ret); | ||
1937 | else | ||
1938 | ret = copied; | ||
1939 | } | ||
1940 | |||
1941 | return ret; | ||
1942 | } | ||
1943 | EXPORT_SYMBOL(pagecache_write_end); | ||
1944 | |||
1799 | ssize_t | 1945 | ssize_t |
1800 | generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | 1946 | generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, |
1801 | unsigned long *nr_segs, loff_t pos, loff_t *ppos, | 1947 | unsigned long *nr_segs, loff_t pos, loff_t *ppos, |
@@ -1835,151 +1981,314 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
1835 | } | 1981 | } |
1836 | EXPORT_SYMBOL(generic_file_direct_write); | 1982 | EXPORT_SYMBOL(generic_file_direct_write); |
1837 | 1983 | ||
1838 | ssize_t | 1984 | /* |
1839 | generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | 1985 | * Find or create a page at the given pagecache position. Return the locked |
1840 | unsigned long nr_segs, loff_t pos, loff_t *ppos, | 1986 | * page. This function is specifically for buffered writes. |
1841 | size_t count, ssize_t written) | 1987 | */ |
1988 | struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) | ||
1842 | { | 1989 | { |
1843 | struct file *file = iocb->ki_filp; | 1990 | int status; |
1844 | struct address_space * mapping = file->f_mapping; | 1991 | struct page *page; |
1845 | const struct address_space_operations *a_ops = mapping->a_ops; | 1992 | repeat: |
1846 | struct inode *inode = mapping->host; | 1993 | page = find_lock_page(mapping, index); |
1847 | long status = 0; | 1994 | if (likely(page)) |
1848 | struct page *page; | 1995 | return page; |
1849 | struct page *cached_page = NULL; | ||
1850 | size_t bytes; | ||
1851 | struct pagevec lru_pvec; | ||
1852 | const struct iovec *cur_iov = iov; /* current iovec */ | ||
1853 | size_t iov_base = 0; /* offset in the current iovec */ | ||
1854 | char __user *buf; | ||
1855 | |||
1856 | pagevec_init(&lru_pvec, 0); | ||
1857 | 1996 | ||
1858 | /* | 1997 | page = page_cache_alloc(mapping); |
1859 | * handle partial DIO write. Adjust cur_iov if needed. | 1998 | if (!page) |
1860 | */ | 1999 | return NULL; |
1861 | if (likely(nr_segs == 1)) | 2000 | status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); |
1862 | buf = iov->iov_base + written; | 2001 | if (unlikely(status)) { |
1863 | else { | 2002 | page_cache_release(page); |
1864 | filemap_set_next_iovec(&cur_iov, &iov_base, written); | 2003 | if (status == -EEXIST) |
1865 | buf = cur_iov->iov_base + iov_base; | 2004 | goto repeat; |
2005 | return NULL; | ||
1866 | } | 2006 | } |
2007 | return page; | ||
2008 | } | ||
2009 | EXPORT_SYMBOL(__grab_cache_page); | ||
2010 | |||
2011 | static ssize_t generic_perform_write_2copy(struct file *file, | ||
2012 | struct iov_iter *i, loff_t pos) | ||
2013 | { | ||
2014 | struct address_space *mapping = file->f_mapping; | ||
2015 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2016 | struct inode *inode = mapping->host; | ||
2017 | long status = 0; | ||
2018 | ssize_t written = 0; | ||
1867 | 2019 | ||
1868 | do { | 2020 | do { |
1869 | unsigned long index; | 2021 | struct page *src_page; |
1870 | unsigned long offset; | 2022 | struct page *page; |
1871 | size_t copied; | 2023 | pgoff_t index; /* Pagecache index for current page */ |
2024 | unsigned long offset; /* Offset into pagecache page */ | ||
2025 | unsigned long bytes; /* Bytes to write to page */ | ||
2026 | size_t copied; /* Bytes copied from user */ | ||
1872 | 2027 | ||
1873 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | 2028 | offset = (pos & (PAGE_CACHE_SIZE - 1)); |
1874 | index = pos >> PAGE_CACHE_SHIFT; | 2029 | index = pos >> PAGE_CACHE_SHIFT; |
1875 | bytes = PAGE_CACHE_SIZE - offset; | 2030 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, |
1876 | 2031 | iov_iter_count(i)); | |
1877 | /* Limit the size of the copy to the caller's write size */ | ||
1878 | bytes = min(bytes, count); | ||
1879 | 2032 | ||
1880 | /* We only need to worry about prefaulting when writes are from | 2033 | /* |
1881 | * user-space. NFSd uses vfs_writev with several non-aligned | 2034 | * a non-NULL src_page indicates that we're doing the |
1882 | * segments in the vector, and limiting to one segment a time is | 2035 | * copy via get_user_pages and kmap. |
1883 | * a noticeable performance for re-write | ||
1884 | */ | 2036 | */ |
1885 | if (!segment_eq(get_fs(), KERNEL_DS)) { | 2037 | src_page = NULL; |
1886 | /* | ||
1887 | * Limit the size of the copy to that of the current | ||
1888 | * segment, because fault_in_pages_readable() doesn't | ||
1889 | * know how to walk segments. | ||
1890 | */ | ||
1891 | bytes = min(bytes, cur_iov->iov_len - iov_base); | ||
1892 | 2038 | ||
1893 | /* | 2039 | /* |
1894 | * Bring in the user page that we will copy from | 2040 | * Bring in the user page that we will copy from _first_. |
1895 | * _first_. Otherwise there's a nasty deadlock on | 2041 | * Otherwise there's a nasty deadlock on copying from the |
1896 | * copying from the same page as we're writing to, | 2042 | * same page as we're writing to, without it being marked |
1897 | * without it being marked up-to-date. | 2043 | * up-to-date. |
1898 | */ | 2044 | * |
1899 | fault_in_pages_readable(buf, bytes); | 2045 | * Not only is this an optimisation, but it is also required |
2046 | * to check that the address is actually valid, when atomic | ||
2047 | * usercopies are used, below. | ||
2048 | */ | ||
2049 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | ||
2050 | status = -EFAULT; | ||
2051 | break; | ||
1900 | } | 2052 | } |
1901 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); | 2053 | |
2054 | page = __grab_cache_page(mapping, index); | ||
1902 | if (!page) { | 2055 | if (!page) { |
1903 | status = -ENOMEM; | 2056 | status = -ENOMEM; |
1904 | break; | 2057 | break; |
1905 | } | 2058 | } |
1906 | 2059 | ||
1907 | if (unlikely(bytes == 0)) { | 2060 | /* |
1908 | status = 0; | 2061 | * non-uptodate pages cannot cope with short copies, and we |
1909 | copied = 0; | 2062 | * cannot take a pagefault with the destination page locked. |
1910 | goto zero_length_segment; | 2063 | * So pin the source page to copy it. |
1911 | } | 2064 | */ |
2065 | if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { | ||
2066 | unlock_page(page); | ||
1912 | 2067 | ||
1913 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | 2068 | src_page = alloc_page(GFP_KERNEL); |
1914 | if (unlikely(status)) { | 2069 | if (!src_page) { |
1915 | loff_t isize = i_size_read(inode); | 2070 | page_cache_release(page); |
2071 | status = -ENOMEM; | ||
2072 | break; | ||
2073 | } | ||
2074 | |||
2075 | /* | ||
2076 | * Cannot get_user_pages with a page locked for the | ||
2077 | * same reason as we can't take a page fault with a | ||
2078 | * page locked (as explained below). | ||
2079 | */ | ||
2080 | copied = iov_iter_copy_from_user(src_page, i, | ||
2081 | offset, bytes); | ||
2082 | if (unlikely(copied == 0)) { | ||
2083 | status = -EFAULT; | ||
2084 | page_cache_release(page); | ||
2085 | page_cache_release(src_page); | ||
2086 | break; | ||
2087 | } | ||
2088 | bytes = copied; | ||
1916 | 2089 | ||
1917 | if (status != AOP_TRUNCATED_PAGE) | 2090 | lock_page(page); |
2091 | /* | ||
2092 | * Can't handle the page going uptodate here, because | ||
2093 | * that means we would use non-atomic usercopies, which | ||
2094 | * zero out the tail of the page, which can cause | ||
2095 | * zeroes to become transiently visible. We could just | ||
2096 | * use a non-zeroing copy, but the APIs aren't too | ||
2097 | * consistent. | ||
2098 | */ | ||
2099 | if (unlikely(!page->mapping || PageUptodate(page))) { | ||
1918 | unlock_page(page); | 2100 | unlock_page(page); |
1919 | page_cache_release(page); | 2101 | page_cache_release(page); |
1920 | if (status == AOP_TRUNCATED_PAGE) | 2102 | page_cache_release(src_page); |
1921 | continue; | 2103 | continue; |
2104 | } | ||
2105 | } | ||
2106 | |||
2107 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | ||
2108 | if (unlikely(status)) | ||
2109 | goto fs_write_aop_error; | ||
2110 | |||
2111 | if (!src_page) { | ||
1922 | /* | 2112 | /* |
1923 | * prepare_write() may have instantiated a few blocks | 2113 | * Must not enter the pagefault handler here, because |
1924 | * outside i_size. Trim these off again. | 2114 | * we hold the page lock, so we might recursively |
2115 | * deadlock on the same lock, or get an ABBA deadlock | ||
2116 | * against a different lock, or against the mmap_sem | ||
2117 | * (which nests outside the page lock). So increment | ||
2118 | * preempt count, and use _atomic usercopies. | ||
2119 | * | ||
2120 | * The page is uptodate so we are OK to encounter a | ||
2121 | * short copy: if unmodified parts of the page are | ||
2122 | * marked dirty and written out to disk, it doesn't | ||
2123 | * really matter. | ||
1925 | */ | 2124 | */ |
1926 | if (pos + bytes > isize) | 2125 | pagefault_disable(); |
1927 | vmtruncate(inode, isize); | 2126 | copied = iov_iter_copy_from_user_atomic(page, i, |
1928 | break; | 2127 | offset, bytes); |
2128 | pagefault_enable(); | ||
2129 | } else { | ||
2130 | void *src, *dst; | ||
2131 | src = kmap_atomic(src_page, KM_USER0); | ||
2132 | dst = kmap_atomic(page, KM_USER1); | ||
2133 | memcpy(dst + offset, src + offset, bytes); | ||
2134 | kunmap_atomic(dst, KM_USER1); | ||
2135 | kunmap_atomic(src, KM_USER0); | ||
2136 | copied = bytes; | ||
1929 | } | 2137 | } |
1930 | if (likely(nr_segs == 1)) | ||
1931 | copied = filemap_copy_from_user(page, offset, | ||
1932 | buf, bytes); | ||
1933 | else | ||
1934 | copied = filemap_copy_from_user_iovec(page, offset, | ||
1935 | cur_iov, iov_base, bytes); | ||
1936 | flush_dcache_page(page); | 2138 | flush_dcache_page(page); |
2139 | |||
1937 | status = a_ops->commit_write(file, page, offset, offset+bytes); | 2140 | status = a_ops->commit_write(file, page, offset, offset+bytes); |
1938 | if (status == AOP_TRUNCATED_PAGE) { | 2141 | if (unlikely(status < 0)) |
1939 | page_cache_release(page); | 2142 | goto fs_write_aop_error; |
1940 | continue; | 2143 | if (unlikely(status > 0)) /* filesystem did partial write */ |
1941 | } | 2144 | copied = min_t(size_t, copied, status); |
1942 | zero_length_segment: | 2145 | |
1943 | if (likely(copied >= 0)) { | ||
1944 | if (!status) | ||
1945 | status = copied; | ||
1946 | |||
1947 | if (status >= 0) { | ||
1948 | written += status; | ||
1949 | count -= status; | ||
1950 | pos += status; | ||
1951 | buf += status; | ||
1952 | if (unlikely(nr_segs > 1)) { | ||
1953 | filemap_set_next_iovec(&cur_iov, | ||
1954 | &iov_base, status); | ||
1955 | if (count) | ||
1956 | buf = cur_iov->iov_base + | ||
1957 | iov_base; | ||
1958 | } else { | ||
1959 | iov_base += status; | ||
1960 | } | ||
1961 | } | ||
1962 | } | ||
1963 | if (unlikely(copied != bytes)) | ||
1964 | if (status >= 0) | ||
1965 | status = -EFAULT; | ||
1966 | unlock_page(page); | 2146 | unlock_page(page); |
1967 | mark_page_accessed(page); | 2147 | mark_page_accessed(page); |
1968 | page_cache_release(page); | 2148 | page_cache_release(page); |
1969 | if (status < 0) | 2149 | if (src_page) |
1970 | break; | 2150 | page_cache_release(src_page); |
2151 | |||
2152 | iov_iter_advance(i, copied); | ||
2153 | pos += copied; | ||
2154 | written += copied; | ||
2155 | |||
1971 | balance_dirty_pages_ratelimited(mapping); | 2156 | balance_dirty_pages_ratelimited(mapping); |
1972 | cond_resched(); | 2157 | cond_resched(); |
1973 | } while (count); | 2158 | continue; |
1974 | *ppos = pos; | ||
1975 | 2159 | ||
1976 | if (cached_page) | 2160 | fs_write_aop_error: |
1977 | page_cache_release(cached_page); | 2161 | unlock_page(page); |
2162 | page_cache_release(page); | ||
2163 | if (src_page) | ||
2164 | page_cache_release(src_page); | ||
2165 | |||
2166 | /* | ||
2167 | * prepare_write() may have instantiated a few blocks | ||
2168 | * outside i_size. Trim these off again. Don't need | ||
2169 | * i_size_read because we hold i_mutex. | ||
2170 | */ | ||
2171 | if (pos + bytes > inode->i_size) | ||
2172 | vmtruncate(inode, inode->i_size); | ||
2173 | break; | ||
2174 | } while (iov_iter_count(i)); | ||
2175 | |||
2176 | return written ? written : status; | ||
2177 | } | ||
2178 | |||
2179 | static ssize_t generic_perform_write(struct file *file, | ||
2180 | struct iov_iter *i, loff_t pos) | ||
2181 | { | ||
2182 | struct address_space *mapping = file->f_mapping; | ||
2183 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2184 | long status = 0; | ||
2185 | ssize_t written = 0; | ||
2186 | unsigned int flags = 0; | ||
1978 | 2187 | ||
1979 | /* | 2188 | /* |
1980 | * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC | 2189 | * Copies from kernel address space cannot fail (NFSD is a big user). |
1981 | */ | 2190 | */ |
2191 | if (segment_eq(get_fs(), KERNEL_DS)) | ||
2192 | flags |= AOP_FLAG_UNINTERRUPTIBLE; | ||
2193 | |||
2194 | do { | ||
2195 | struct page *page; | ||
2196 | pgoff_t index; /* Pagecache index for current page */ | ||
2197 | unsigned long offset; /* Offset into pagecache page */ | ||
2198 | unsigned long bytes; /* Bytes to write to page */ | ||
2199 | size_t copied; /* Bytes copied from user */ | ||
2200 | void *fsdata; | ||
2201 | |||
2202 | offset = (pos & (PAGE_CACHE_SIZE - 1)); | ||
2203 | index = pos >> PAGE_CACHE_SHIFT; | ||
2204 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, | ||
2205 | iov_iter_count(i)); | ||
2206 | |||
2207 | again: | ||
2208 | |||
2209 | /* | ||
2210 | * Bring in the user page that we will copy from _first_. | ||
2211 | * Otherwise there's a nasty deadlock on copying from the | ||
2212 | * same page as we're writing to, without it being marked | ||
2213 | * up-to-date. | ||
2214 | * | ||
2215 | * Not only is this an optimisation, but it is also required | ||
2216 | * to check that the address is actually valid, when atomic | ||
2217 | * usercopies are used, below. | ||
2218 | */ | ||
2219 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | ||
2220 | status = -EFAULT; | ||
2221 | break; | ||
2222 | } | ||
2223 | |||
2224 | status = a_ops->write_begin(file, mapping, pos, bytes, flags, | ||
2225 | &page, &fsdata); | ||
2226 | if (unlikely(status)) | ||
2227 | break; | ||
2228 | |||
2229 | pagefault_disable(); | ||
2230 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | ||
2231 | pagefault_enable(); | ||
2232 | flush_dcache_page(page); | ||
2233 | |||
2234 | status = a_ops->write_end(file, mapping, pos, bytes, copied, | ||
2235 | page, fsdata); | ||
2236 | if (unlikely(status < 0)) | ||
2237 | break; | ||
2238 | copied = status; | ||
2239 | |||
2240 | cond_resched(); | ||
2241 | |||
2242 | if (unlikely(copied == 0)) { | ||
2243 | /* | ||
2244 | * If we were unable to copy any data at all, we must | ||
2245 | * fall back to a single segment length write. | ||
2246 | * | ||
2247 | * If we didn't fallback here, we could livelock | ||
2248 | * because not all segments in the iov can be copied at | ||
2249 | * once without a pagefault. | ||
2250 | */ | ||
2251 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, | ||
2252 | iov_iter_single_seg_count(i)); | ||
2253 | goto again; | ||
2254 | } | ||
2255 | iov_iter_advance(i, copied); | ||
2256 | pos += copied; | ||
2257 | written += copied; | ||
2258 | |||
2259 | balance_dirty_pages_ratelimited(mapping); | ||
2260 | |||
2261 | } while (iov_iter_count(i)); | ||
2262 | |||
2263 | return written ? written : status; | ||
2264 | } | ||
2265 | |||
2266 | ssize_t | ||
2267 | generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | ||
2268 | unsigned long nr_segs, loff_t pos, loff_t *ppos, | ||
2269 | size_t count, ssize_t written) | ||
2270 | { | ||
2271 | struct file *file = iocb->ki_filp; | ||
2272 | struct address_space *mapping = file->f_mapping; | ||
2273 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2274 | struct inode *inode = mapping->host; | ||
2275 | ssize_t status; | ||
2276 | struct iov_iter i; | ||
2277 | |||
2278 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
2279 | if (a_ops->write_begin) | ||
2280 | status = generic_perform_write(file, &i, pos); | ||
2281 | else | ||
2282 | status = generic_perform_write_2copy(file, &i, pos); | ||
2283 | |||
1982 | if (likely(status >= 0)) { | 2284 | if (likely(status >= 0)) { |
2285 | written += status; | ||
2286 | *ppos = pos + status; | ||
2287 | |||
2288 | /* | ||
2289 | * For now, when the user asks for O_SYNC, we'll actually give | ||
2290 | * O_DSYNC | ||
2291 | */ | ||
1983 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2292 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { |
1984 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) | 2293 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) |
1985 | status = generic_osync_inode(inode, mapping, | 2294 | status = generic_osync_inode(inode, mapping, |
@@ -1995,7 +2304,6 @@ zero_length_segment: | |||
1995 | if (unlikely(file->f_flags & O_DIRECT) && written) | 2304 | if (unlikely(file->f_flags & O_DIRECT) && written) |
1996 | status = filemap_write_and_wait(mapping); | 2305 | status = filemap_write_and_wait(mapping); |
1997 | 2306 | ||
1998 | pagevec_lru_add(&lru_pvec); | ||
1999 | return written ? written : status; | 2307 | return written ? written : status; |
2000 | } | 2308 | } |
2001 | EXPORT_SYMBOL(generic_file_buffered_write); | 2309 | EXPORT_SYMBOL(generic_file_buffered_write); |