aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /mm/filemap.c
parentb2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c422
1 files changed, 253 insertions, 169 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d9d9e2b755..54e968650855 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,9 +42,6 @@
42 42
43#include <asm/mman.h> 43#include <asm/mman.h>
44 44
45static ssize_t
46generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
47 loff_t offset, unsigned long nr_segs);
48 45
49/* 46/*
50 * Shared mappings implemented 30.11.1994. It's not fully working yet, 47 * Shared mappings implemented 30.11.1994. It's not fully working yet,
@@ -112,13 +109,13 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
112/* 109/*
113 * Remove a page from the page cache and free it. Caller has to make 110 * Remove a page from the page cache and free it. Caller has to make
114 * sure the page is locked and that nobody else uses it - or that usage 111 * sure the page is locked and that nobody else uses it - or that usage
115 * is safe. The caller must hold a write_lock on the mapping's tree_lock. 112 * is safe. The caller must hold the mapping's tree_lock.
116 */ 113 */
117void __remove_from_page_cache(struct page *page) 114void __remove_from_page_cache(struct page *page)
118{ 115{
119 struct address_space *mapping = page->mapping; 116 struct address_space *mapping = page->mapping;
120 117
121 mem_cgroup_uncharge_page(page); 118 mem_cgroup_uncharge_cache_page(page);
122 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
123 page->mapping = NULL; 120 page->mapping = NULL;
124 mapping->nrpages--; 121 mapping->nrpages--;
@@ -144,9 +141,9 @@ void remove_from_page_cache(struct page *page)
144 141
145 BUG_ON(!PageLocked(page)); 142 BUG_ON(!PageLocked(page));
146 143
147 write_lock_irq(&mapping->tree_lock); 144 spin_lock_irq(&mapping->tree_lock);
148 __remove_from_page_cache(page); 145 __remove_from_page_cache(page);
149 write_unlock_irq(&mapping->tree_lock); 146 spin_unlock_irq(&mapping->tree_lock);
150} 147}
151 148
152static int sync_page(void *word) 149static int sync_page(void *word)
@@ -445,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping,
445} 442}
446 443
447/** 444/**
448 * add_to_page_cache - add newly allocated pagecache pages 445 * add_to_page_cache_locked - add a locked page to the pagecache
449 * @page: page to add 446 * @page: page to add
450 * @mapping: the page's address_space 447 * @mapping: the page's address_space
451 * @offset: page index 448 * @offset: page index
452 * @gfp_mask: page allocation mode 449 * @gfp_mask: page allocation mode
453 * 450 *
454 * This function is used to add newly allocated pagecache pages; 451 * This function is used to add a page to the pagecache. It must be locked.
455 * the page is new, so we can just run SetPageLocked() against it.
456 * The other page state flags were set by rmqueue().
457 *
458 * This function does not add the page to the LRU. The caller must do that. 452 * This function does not add the page to the LRU. The caller must do that.
459 */ 453 */
460int add_to_page_cache(struct page *page, struct address_space *mapping, 454int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
461 pgoff_t offset, gfp_t gfp_mask) 455 pgoff_t offset, gfp_t gfp_mask)
462{ 456{
463 int error = mem_cgroup_cache_charge(page, current->mm, 457 int error;
458
459 VM_BUG_ON(!PageLocked(page));
460
461 error = mem_cgroup_cache_charge(page, current->mm,
464 gfp_mask & ~__GFP_HIGHMEM); 462 gfp_mask & ~__GFP_HIGHMEM);
465 if (error) 463 if (error)
466 goto out; 464 goto out;
467 465
468 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); 466 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
469 if (error == 0) { 467 if (error == 0) {
470 write_lock_irq(&mapping->tree_lock); 468 page_cache_get(page);
469 page->mapping = mapping;
470 page->index = offset;
471
472 spin_lock_irq(&mapping->tree_lock);
471 error = radix_tree_insert(&mapping->page_tree, offset, page); 473 error = radix_tree_insert(&mapping->page_tree, offset, page);
472 if (!error) { 474 if (likely(!error)) {
473 page_cache_get(page);
474 SetPageLocked(page);
475 page->mapping = mapping;
476 page->index = offset;
477 mapping->nrpages++; 475 mapping->nrpages++;
478 __inc_zone_page_state(page, NR_FILE_PAGES); 476 __inc_zone_page_state(page, NR_FILE_PAGES);
479 } else 477 } else {
480 mem_cgroup_uncharge_page(page); 478 page->mapping = NULL;
479 mem_cgroup_uncharge_cache_page(page);
480 page_cache_release(page);
481 }
481 482
482 write_unlock_irq(&mapping->tree_lock); 483 spin_unlock_irq(&mapping->tree_lock);
483 radix_tree_preload_end(); 484 radix_tree_preload_end();
484 } else 485 } else
485 mem_cgroup_uncharge_page(page); 486 mem_cgroup_uncharge_cache_page(page);
486out: 487out:
487 return error; 488 return error;
488} 489}
489EXPORT_SYMBOL(add_to_page_cache); 490EXPORT_SYMBOL(add_to_page_cache_locked);
490 491
491int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 492int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
492 pgoff_t offset, gfp_t gfp_mask) 493 pgoff_t offset, gfp_t gfp_mask)
@@ -557,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
557 * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. 558 * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
558 * 559 *
559 * The first mb is necessary to safely close the critical section opened by the 560 * The first mb is necessary to safely close the critical section opened by the
560 * TestSetPageLocked(), the second mb is necessary to enforce ordering between 561 * test_and_set_bit() to lock the page; the second mb is necessary to enforce
561 * the clear_bit and the read of the waitqueue (to avoid SMP races with a 562 * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
562 * parallel wait_on_page_locked()). 563 * races with a parallel wait_on_page_locked()).
563 */ 564 */
564void unlock_page(struct page *page) 565void unlock_page(struct page *page)
565{ 566{
566 smp_mb__before_clear_bit(); 567 smp_mb__before_clear_bit();
567 if (!TestClearPageLocked(page)) 568 if (!test_and_clear_bit(PG_locked, &page->flags))
568 BUG(); 569 BUG();
569 smp_mb__after_clear_bit(); 570 smp_mb__after_clear_bit();
570 wake_up_page(page, PG_locked); 571 wake_up_page(page, PG_locked);
@@ -636,15 +637,35 @@ void __lock_page_nosync(struct page *page)
636 * Is there a pagecache struct page at the given (mapping, offset) tuple? 637 * Is there a pagecache struct page at the given (mapping, offset) tuple?
637 * If yes, increment its refcount and return it; if no, return NULL. 638 * If yes, increment its refcount and return it; if no, return NULL.
638 */ 639 */
639struct page * find_get_page(struct address_space *mapping, pgoff_t offset) 640struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
640{ 641{
642 void **pagep;
641 struct page *page; 643 struct page *page;
642 644
643 read_lock_irq(&mapping->tree_lock); 645 rcu_read_lock();
644 page = radix_tree_lookup(&mapping->page_tree, offset); 646repeat:
645 if (page) 647 page = NULL;
646 page_cache_get(page); 648 pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
647 read_unlock_irq(&mapping->tree_lock); 649 if (pagep) {
650 page = radix_tree_deref_slot(pagep);
651 if (unlikely(!page || page == RADIX_TREE_RETRY))
652 goto repeat;
653
654 if (!page_cache_get_speculative(page))
655 goto repeat;
656
657 /*
658 * Has the page moved?
659 * This is part of the lockless pagecache protocol. See
660 * include/linux/pagemap.h for details.
661 */
662 if (unlikely(page != *pagep)) {
663 page_cache_release(page);
664 goto repeat;
665 }
666 }
667 rcu_read_unlock();
668
648 return page; 669 return page;
649} 670}
650EXPORT_SYMBOL(find_get_page); 671EXPORT_SYMBOL(find_get_page);
@@ -659,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
659 * 680 *
660 * Returns zero if the page was not present. find_lock_page() may sleep. 681 * Returns zero if the page was not present. find_lock_page() may sleep.
661 */ 682 */
662struct page *find_lock_page(struct address_space *mapping, 683struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
663 pgoff_t offset)
664{ 684{
665 struct page *page; 685 struct page *page;
666 686
667repeat: 687repeat:
668 read_lock_irq(&mapping->tree_lock); 688 page = find_get_page(mapping, offset);
669 page = radix_tree_lookup(&mapping->page_tree, offset);
670 if (page) { 689 if (page) {
671 page_cache_get(page); 690 lock_page(page);
672 if (TestSetPageLocked(page)) { 691 /* Has the page been truncated? */
673 read_unlock_irq(&mapping->tree_lock); 692 if (unlikely(page->mapping != mapping)) {
674 __lock_page(page); 693 unlock_page(page);
675 694 page_cache_release(page);
676 /* Has the page been truncated while we slept? */ 695 goto repeat;
677 if (unlikely(page->mapping != mapping)) {
678 unlock_page(page);
679 page_cache_release(page);
680 goto repeat;
681 }
682 VM_BUG_ON(page->index != offset);
683 goto out;
684 } 696 }
697 VM_BUG_ON(page->index != offset);
685 } 698 }
686 read_unlock_irq(&mapping->tree_lock);
687out:
688 return page; 699 return page;
689} 700}
690EXPORT_SYMBOL(find_lock_page); 701EXPORT_SYMBOL(find_lock_page);
@@ -750,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
750{ 761{
751 unsigned int i; 762 unsigned int i;
752 unsigned int ret; 763 unsigned int ret;
764 unsigned int nr_found;
765
766 rcu_read_lock();
767restart:
768 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
769 (void ***)pages, start, nr_pages);
770 ret = 0;
771 for (i = 0; i < nr_found; i++) {
772 struct page *page;
773repeat:
774 page = radix_tree_deref_slot((void **)pages[i]);
775 if (unlikely(!page))
776 continue;
777 /*
778 * this can only trigger if nr_found == 1, making livelock
779 * a non issue.
780 */
781 if (unlikely(page == RADIX_TREE_RETRY))
782 goto restart;
753 783
754 read_lock_irq(&mapping->tree_lock); 784 if (!page_cache_get_speculative(page))
755 ret = radix_tree_gang_lookup(&mapping->page_tree, 785 goto repeat;
756 (void **)pages, start, nr_pages); 786
757 for (i = 0; i < ret; i++) 787 /* Has the page moved? */
758 page_cache_get(pages[i]); 788 if (unlikely(page != *((void **)pages[i]))) {
759 read_unlock_irq(&mapping->tree_lock); 789 page_cache_release(page);
790 goto repeat;
791 }
792
793 pages[ret] = page;
794 ret++;
795 }
796 rcu_read_unlock();
760 return ret; 797 return ret;
761} 798}
762 799
@@ -777,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
777{ 814{
778 unsigned int i; 815 unsigned int i;
779 unsigned int ret; 816 unsigned int ret;
817 unsigned int nr_found;
818
819 rcu_read_lock();
820restart:
821 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
822 (void ***)pages, index, nr_pages);
823 ret = 0;
824 for (i = 0; i < nr_found; i++) {
825 struct page *page;
826repeat:
827 page = radix_tree_deref_slot((void **)pages[i]);
828 if (unlikely(!page))
829 continue;
830 /*
831 * this can only trigger if nr_found == 1, making livelock
832 * a non issue.
833 */
834 if (unlikely(page == RADIX_TREE_RETRY))
835 goto restart;
780 836
781 read_lock_irq(&mapping->tree_lock); 837 if (page->mapping == NULL || page->index != index)
782 ret = radix_tree_gang_lookup(&mapping->page_tree,
783 (void **)pages, index, nr_pages);
784 for (i = 0; i < ret; i++) {
785 if (pages[i]->mapping == NULL || pages[i]->index != index)
786 break; 838 break;
787 839
788 page_cache_get(pages[i]); 840 if (!page_cache_get_speculative(page))
841 goto repeat;
842
843 /* Has the page moved? */
844 if (unlikely(page != *((void **)pages[i]))) {
845 page_cache_release(page);
846 goto repeat;
847 }
848
849 pages[ret] = page;
850 ret++;
789 index++; 851 index++;
790 } 852 }
791 read_unlock_irq(&mapping->tree_lock); 853 rcu_read_unlock();
792 return i; 854 return ret;
793} 855}
794EXPORT_SYMBOL(find_get_pages_contig); 856EXPORT_SYMBOL(find_get_pages_contig);
795 857
@@ -809,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
809{ 871{
810 unsigned int i; 872 unsigned int i;
811 unsigned int ret; 873 unsigned int ret;
874 unsigned int nr_found;
875
876 rcu_read_lock();
877restart:
878 nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
879 (void ***)pages, *index, nr_pages, tag);
880 ret = 0;
881 for (i = 0; i < nr_found; i++) {
882 struct page *page;
883repeat:
884 page = radix_tree_deref_slot((void **)pages[i]);
885 if (unlikely(!page))
886 continue;
887 /*
888 * this can only trigger if nr_found == 1, making livelock
889 * a non issue.
890 */
891 if (unlikely(page == RADIX_TREE_RETRY))
892 goto restart;
893
894 if (!page_cache_get_speculative(page))
895 goto repeat;
896
897 /* Has the page moved? */
898 if (unlikely(page != *((void **)pages[i]))) {
899 page_cache_release(page);
900 goto repeat;
901 }
902
903 pages[ret] = page;
904 ret++;
905 }
906 rcu_read_unlock();
812 907
813 read_lock_irq(&mapping->tree_lock);
814 ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
815 (void **)pages, *index, nr_pages, tag);
816 for (i = 0; i < ret; i++)
817 page_cache_get(pages[i]);
818 if (ret) 908 if (ret)
819 *index = pages[ret - 1]->index + 1; 909 *index = pages[ret - 1]->index + 1;
820 read_unlock_irq(&mapping->tree_lock); 910
821 return ret; 911 return ret;
822} 912}
823EXPORT_SYMBOL(find_get_pages_tag); 913EXPORT_SYMBOL(find_get_pages_tag);
@@ -841,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
841 struct page *page = find_get_page(mapping, index); 931 struct page *page = find_get_page(mapping, index);
842 932
843 if (page) { 933 if (page) {
844 if (!TestSetPageLocked(page)) 934 if (trylock_page(page))
845 return page; 935 return page;
846 page_cache_release(page); 936 page_cache_release(page);
847 return NULL; 937 return NULL;
@@ -933,8 +1023,17 @@ find_page:
933 ra, filp, page, 1023 ra, filp, page,
934 index, last_index - index); 1024 index, last_index - index);
935 } 1025 }
936 if (!PageUptodate(page)) 1026 if (!PageUptodate(page)) {
937 goto page_not_up_to_date; 1027 if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
1028 !mapping->a_ops->is_partially_uptodate)
1029 goto page_not_up_to_date;
1030 if (!trylock_page(page))
1031 goto page_not_up_to_date;
1032 if (!mapping->a_ops->is_partially_uptodate(page,
1033 desc, offset))
1034 goto page_not_up_to_date_locked;
1035 unlock_page(page);
1036 }
938page_ok: 1037page_ok:
939 /* 1038 /*
940 * i_size must be checked after we know the page is Uptodate. 1039 * i_size must be checked after we know the page is Uptodate.
@@ -1004,6 +1103,7 @@ page_not_up_to_date:
1004 if (lock_page_killable(page)) 1103 if (lock_page_killable(page))
1005 goto readpage_eio; 1104 goto readpage_eio;
1006 1105
1106page_not_up_to_date_locked:
1007 /* Did it get truncated before we got the lock? */ 1107 /* Did it get truncated before we got the lock? */
1008 if (!page->mapping) { 1108 if (!page->mapping) {
1009 unlock_page(page); 1109 unlock_page(page);
@@ -1200,42 +1300,41 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1200 1300
1201 mapping = filp->f_mapping; 1301 mapping = filp->f_mapping;
1202 inode = mapping->host; 1302 inode = mapping->host;
1203 retval = 0;
1204 if (!count) 1303 if (!count)
1205 goto out; /* skip atime */ 1304 goto out; /* skip atime */
1206 size = i_size_read(inode); 1305 size = i_size_read(inode);
1207 if (pos < size) { 1306 if (pos < size) {
1208 retval = generic_file_direct_IO(READ, iocb, 1307 retval = filemap_write_and_wait(mapping);
1209 iov, pos, nr_segs); 1308 if (!retval) {
1309 retval = mapping->a_ops->direct_IO(READ, iocb,
1310 iov, pos, nr_segs);
1311 }
1210 if (retval > 0) 1312 if (retval > 0)
1211 *ppos = pos + retval; 1313 *ppos = pos + retval;
1212 } 1314 if (retval) {
1213 if (likely(retval != 0)) { 1315 file_accessed(filp);
1214 file_accessed(filp); 1316 goto out;
1215 goto out; 1317 }
1216 } 1318 }
1217 } 1319 }
1218 1320
1219 retval = 0; 1321 for (seg = 0; seg < nr_segs; seg++) {
1220 if (count) { 1322 read_descriptor_t desc;
1221 for (seg = 0; seg < nr_segs; seg++) {
1222 read_descriptor_t desc;
1223 1323
1224 desc.written = 0; 1324 desc.written = 0;
1225 desc.arg.buf = iov[seg].iov_base; 1325 desc.arg.buf = iov[seg].iov_base;
1226 desc.count = iov[seg].iov_len; 1326 desc.count = iov[seg].iov_len;
1227 if (desc.count == 0) 1327 if (desc.count == 0)
1228 continue; 1328 continue;
1229 desc.error = 0; 1329 desc.error = 0;
1230 do_generic_file_read(filp,ppos,&desc,file_read_actor); 1330 do_generic_file_read(filp, ppos, &desc, file_read_actor);
1231 retval += desc.written; 1331 retval += desc.written;
1232 if (desc.error) { 1332 if (desc.error) {
1233 retval = retval ?: desc.error; 1333 retval = retval ?: desc.error;
1234 break; 1334 break;
1235 }
1236 if (desc.count > 0)
1237 break;
1238 } 1335 }
1336 if (desc.count > 0)
1337 break;
1239 } 1338 }
1240out: 1339out:
1241 return retval; 1340 return retval;
@@ -1669,8 +1768,9 @@ static int __remove_suid(struct dentry *dentry, int kill)
1669 return notify_change(dentry, &newattrs); 1768 return notify_change(dentry, &newattrs);
1670} 1769}
1671 1770
1672int remove_suid(struct dentry *dentry) 1771int file_remove_suid(struct file *file)
1673{ 1772{
1773 struct dentry *dentry = file->f_path.dentry;
1674 int killsuid = should_remove_suid(dentry); 1774 int killsuid = should_remove_suid(dentry);
1675 int killpriv = security_inode_need_killpriv(dentry); 1775 int killpriv = security_inode_need_killpriv(dentry);
1676 int error = 0; 1776 int error = 0;
@@ -1684,7 +1784,7 @@ int remove_suid(struct dentry *dentry)
1684 1784
1685 return error; 1785 return error;
1686} 1786}
1687EXPORT_SYMBOL(remove_suid); 1787EXPORT_SYMBOL(file_remove_suid);
1688 1788
1689static size_t __iovec_copy_from_user_inatomic(char *vaddr, 1789static size_t __iovec_copy_from_user_inatomic(char *vaddr,
1690 const struct iovec *iov, size_t base, size_t bytes) 1790 const struct iovec *iov, size_t base, size_t bytes)
@@ -1779,7 +1879,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
1779 * The !iov->iov_len check ensures we skip over unlikely 1879 * The !iov->iov_len check ensures we skip over unlikely
1780 * zero-length segments (without overruning the iovec). 1880 * zero-length segments (without overruning the iovec).
1781 */ 1881 */
1782 while (bytes || unlikely(!iov->iov_len && i->count)) { 1882 while (bytes || unlikely(i->count && !iov->iov_len)) {
1783 int copy; 1883 int copy;
1784 1884
1785 copy = min(bytes, iov->iov_len - base); 1885 copy = min(bytes, iov->iov_len - base);
@@ -2004,11 +2104,55 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2004 struct address_space *mapping = file->f_mapping; 2104 struct address_space *mapping = file->f_mapping;
2005 struct inode *inode = mapping->host; 2105 struct inode *inode = mapping->host;
2006 ssize_t written; 2106 ssize_t written;
2107 size_t write_len;
2108 pgoff_t end;
2007 2109
2008 if (count != ocount) 2110 if (count != ocount)
2009 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); 2111 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
2010 2112
2011 written = generic_file_direct_IO(WRITE, iocb, iov, pos, *nr_segs); 2113 /*
2114 * Unmap all mmappings of the file up-front.
2115 *
2116 * This will cause any pte dirty bits to be propagated into the
2117 * pageframes for the subsequent filemap_write_and_wait().
2118 */
2119 write_len = iov_length(iov, *nr_segs);
2120 end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
2121 if (mapping_mapped(mapping))
2122 unmap_mapping_range(mapping, pos, write_len, 0);
2123
2124 written = filemap_write_and_wait(mapping);
2125 if (written)
2126 goto out;
2127
2128 /*
2129 * After a write we want buffered reads to be sure to go to disk to get
2130 * the new data. We invalidate clean cached page from the region we're
2131 * about to write. We do this *before* the write so that we can return
2132 * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
2133 */
2134 if (mapping->nrpages) {
2135 written = invalidate_inode_pages2_range(mapping,
2136 pos >> PAGE_CACHE_SHIFT, end);
2137 if (written)
2138 goto out;
2139 }
2140
2141 written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
2142
2143 /*
2144 * Finally, try again to invalidate clean pages which might have been
2145 * cached by non-direct readahead, or faulted in by get_user_pages()
2146 * if the source of the write was an mmap'ed region of the file
2147 * we're writing. Either one is a pretty crazy thing to do,
2148 * so we don't support it 100%. If this invalidation
2149 * fails, tough, the write still worked...
2150 */
2151 if (mapping->nrpages) {
2152 invalidate_inode_pages2_range(mapping,
2153 pos >> PAGE_CACHE_SHIFT, end);
2154 }
2155
2012 if (written > 0) { 2156 if (written > 0) {
2013 loff_t end = pos + written; 2157 loff_t end = pos + written;
2014 if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { 2158 if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
@@ -2024,6 +2168,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2024 * i_mutex is held, which protects generic_osync_inode() from 2168 * i_mutex is held, which protects generic_osync_inode() from
2025 * livelocking. AIO O_DIRECT ops attempt to sync metadata here. 2169 * livelocking. AIO O_DIRECT ops attempt to sync metadata here.
2026 */ 2170 */
2171out:
2027 if ((written >= 0 || written == -EIOCBQUEUED) && 2172 if ((written >= 0 || written == -EIOCBQUEUED) &&
2028 ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2173 ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2029 int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); 2174 int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
@@ -2395,7 +2540,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2395 if (count == 0) 2540 if (count == 0)
2396 goto out; 2541 goto out;
2397 2542
2398 err = remove_suid(file->f_path.dentry); 2543 err = file_remove_suid(file);
2399 if (err) 2544 if (err)
2400 goto out; 2545 goto out;
2401 2546
@@ -2511,66 +2656,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2511} 2656}
2512EXPORT_SYMBOL(generic_file_aio_write); 2657EXPORT_SYMBOL(generic_file_aio_write);
2513 2658
2514/*
2515 * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
2516 * went wrong during pagecache shootdown.
2517 */
2518static ssize_t
2519generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2520 loff_t offset, unsigned long nr_segs)
2521{
2522 struct file *file = iocb->ki_filp;
2523 struct address_space *mapping = file->f_mapping;
2524 ssize_t retval;
2525 size_t write_len;
2526 pgoff_t end = 0; /* silence gcc */
2527
2528 /*
2529 * If it's a write, unmap all mmappings of the file up-front. This
2530 * will cause any pte dirty bits to be propagated into the pageframes
2531 * for the subsequent filemap_write_and_wait().
2532 */
2533 if (rw == WRITE) {
2534 write_len = iov_length(iov, nr_segs);
2535 end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT;
2536 if (mapping_mapped(mapping))
2537 unmap_mapping_range(mapping, offset, write_len, 0);
2538 }
2539
2540 retval = filemap_write_and_wait(mapping);
2541 if (retval)
2542 goto out;
2543
2544 /*
2545 * After a write we want buffered reads to be sure to go to disk to get
2546 * the new data. We invalidate clean cached page from the region we're
2547 * about to write. We do this *before* the write so that we can return
2548 * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
2549 */
2550 if (rw == WRITE && mapping->nrpages) {
2551 retval = invalidate_inode_pages2_range(mapping,
2552 offset >> PAGE_CACHE_SHIFT, end);
2553 if (retval)
2554 goto out;
2555 }
2556
2557 retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
2558
2559 /*
2560 * Finally, try again to invalidate clean pages which might have been
2561 * cached by non-direct readahead, or faulted in by get_user_pages()
2562 * if the source of the write was an mmap'ed region of the file
2563 * we're writing. Either one is a pretty crazy thing to do,
2564 * so we don't support it 100%. If this invalidation
2565 * fails, tough, the write still worked...
2566 */
2567 if (rw == WRITE && mapping->nrpages) {
2568 invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end);
2569 }
2570out:
2571 return retval;
2572}
2573
2574/** 2659/**
2575 * try_to_release_page() - release old fs-specific metadata on a page 2660 * try_to_release_page() - release old fs-specific metadata on a page
2576 * 2661 *
@@ -2582,9 +2667,8 @@ out:
2582 * Otherwise return zero. 2667 * Otherwise return zero.
2583 * 2668 *
2584 * The @gfp_mask argument specifies whether I/O may be performed to release 2669 * The @gfp_mask argument specifies whether I/O may be performed to release
2585 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). 2670 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
2586 * 2671 *
2587 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
2588 */ 2672 */
2589int try_to_release_page(struct page *page, gfp_t gfp_mask) 2673int try_to_release_page(struct page *page, gfp_t gfp_mask)
2590{ 2674{