aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c117
1 files changed, 62 insertions, 55 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2caf..698c6b2cc462 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <linux/cleancache.h>
44 45
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46 47
@@ -54,23 +55,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 55}
55EXPORT_SYMBOL(init_buffer); 56EXPORT_SYMBOL(init_buffer);
56 57
57static int sync_buffer(void *word) 58static int sleep_on_buffer(void *word)
58{ 59{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 60 io_schedule();
68 return 0; 61 return 0;
69} 62}
70 63
71void __lock_buffer(struct buffer_head *bh) 64void __lock_buffer(struct buffer_head *bh)
72{ 65{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 66 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 67 TASK_UNINTERRUPTIBLE);
75} 68}
76EXPORT_SYMBOL(__lock_buffer); 69EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +83,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 83 */
91void __wait_on_buffer(struct buffer_head * bh) 84void __wait_on_buffer(struct buffer_head * bh)
92{ 85{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 86 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 87}
95EXPORT_SYMBOL(__wait_on_buffer); 88EXPORT_SYMBOL(__wait_on_buffer);
96 89
@@ -277,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev)
277 invalidate_bh_lrus(); 270 invalidate_bh_lrus();
278 lru_add_drain_all(); /* make sure all lru add caches are flushed */ 271 lru_add_drain_all(); /* make sure all lru add caches are flushed */
279 invalidate_mapping_pages(mapping, 0, -1); 272 invalidate_mapping_pages(mapping, 0, -1);
273 /* 99% of the time, we don't need to flush the cleancache on the bdev.
274 * But, for the strange corners, lets be cautious
275 */
276 cleancache_flush_inode(mapping);
280} 277}
281EXPORT_SYMBOL(invalidate_bdev); 278EXPORT_SYMBOL(invalidate_bdev);
282 279
@@ -749,10 +746,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 746{
750 struct buffer_head *bh; 747 struct buffer_head *bh;
751 struct list_head tmp; 748 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 749 struct address_space *mapping;
753 int err = 0, err2; 750 int err = 0, err2;
751 struct blk_plug plug;
754 752
755 INIT_LIST_HEAD(&tmp); 753 INIT_LIST_HEAD(&tmp);
754 blk_start_plug(&plug);
756 755
757 spin_lock(lock); 756 spin_lock(lock);
758 while (!list_empty(list)) { 757 while (!list_empty(list)) {
@@ -775,7 +774,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 774 * still in flight on potentially older
776 * contents. 775 * contents.
777 */ 776 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 777 write_dirty_buffer(bh, WRITE_SYNC);
779 778
780 /* 779 /*
781 * Kick off IO for the previous mapping. Note 780 * Kick off IO for the previous mapping. Note
@@ -783,16 +782,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 782 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 783 * through sync_buffer().
785 */ 784 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 785 brelse(bh);
791 spin_lock(lock); 786 spin_lock(lock);
792 } 787 }
793 } 788 }
794 } 789 }
795 790
791 spin_unlock(lock);
792 blk_finish_plug(&plug);
793 spin_lock(lock);
794
796 while (!list_empty(&tmp)) { 795 while (!list_empty(&tmp)) {
797 bh = BH_ENTRY(tmp.prev); 796 bh = BH_ENTRY(tmp.prev);
798 get_bh(bh); 797 get_bh(bh);
@@ -1144,7 +1143,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1144 * inode list. 1143 * inode list.
1145 * 1144 *
1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1145 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1147 * mapping->tree_lock and the global inode_lock. 1146 * mapping->tree_lock and mapping->host->i_lock.
1148 */ 1147 */
1149void mark_buffer_dirty(struct buffer_head *bh) 1148void mark_buffer_dirty(struct buffer_head *bh)
1150{ 1149{
@@ -1614,14 +1613,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1614 * prevents this contention from occurring. 1613 * prevents this contention from occurring.
1615 * 1614 *
1616 * If block_write_full_page() is called with wbc->sync_mode == 1615 * If block_write_full_page() is called with wbc->sync_mode ==
1617 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1616 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1618 * causes the writes to be flagged as synchronous writes, but the 1617 * causes the writes to be flagged as synchronous writes.
1619 * block device queue will NOT be unplugged, since usually many pages
1620 * will be pushed to the out before the higher-level caller actually
1621 * waits for the writes to be completed. The various wait functions,
1622 * such as wait_on_writeback_range() will ultimately call sync_page()
1623 * which will ultimately call blk_run_backing_dev(), which will end up
1624 * unplugging the device queue.
1625 */ 1618 */
1626static int __block_write_full_page(struct inode *inode, struct page *page, 1619static int __block_write_full_page(struct inode *inode, struct page *page,
1627 get_block_t *get_block, struct writeback_control *wbc, 1620 get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1627,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1634 const unsigned blocksize = 1 << inode->i_blkbits; 1627 const unsigned blocksize = 1 << inode->i_blkbits;
1635 int nr_underway = 0; 1628 int nr_underway = 0;
1636 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1629 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1637 WRITE_SYNC_PLUG : WRITE); 1630 WRITE_SYNC : WRITE);
1638 1631
1639 BUG_ON(!PageLocked(page)); 1632 BUG_ON(!PageLocked(page));
1640 1633
@@ -2343,24 +2336,26 @@ EXPORT_SYMBOL(block_commit_write);
2343 * page lock we can determine safely if the page is beyond EOF. If it is not 2336 * page lock we can determine safely if the page is beyond EOF. If it is not
2344 * beyond EOF, then the page is guaranteed safe against truncation until we 2337 * beyond EOF, then the page is guaranteed safe against truncation until we
2345 * unlock the page. 2338 * unlock the page.
2339 *
2340 * Direct callers of this function should call vfs_check_frozen() so that page
2341 * fault does not busyloop until the fs is thawed.
2346 */ 2342 */
2347int 2343int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2348block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2344 get_block_t get_block)
2349 get_block_t get_block)
2350{ 2345{
2351 struct page *page = vmf->page; 2346 struct page *page = vmf->page;
2352 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2347 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2353 unsigned long end; 2348 unsigned long end;
2354 loff_t size; 2349 loff_t size;
2355 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 2350 int ret;
2356 2351
2357 lock_page(page); 2352 lock_page(page);
2358 size = i_size_read(inode); 2353 size = i_size_read(inode);
2359 if ((page->mapping != inode->i_mapping) || 2354 if ((page->mapping != inode->i_mapping) ||
2360 (page_offset(page) > size)) { 2355 (page_offset(page) > size)) {
2361 /* page got truncated out from underneath us */ 2356 /* We overload EFAULT to mean page got truncated */
2362 unlock_page(page); 2357 ret = -EFAULT;
2363 goto out; 2358 goto out_unlock;
2364 } 2359 }
2365 2360
2366 /* page is wholly or partially inside EOF */ 2361 /* page is wholly or partially inside EOF */
@@ -2373,18 +2368,41 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2373 if (!ret) 2368 if (!ret)
2374 ret = block_commit_write(page, 0, end); 2369 ret = block_commit_write(page, 0, end);
2375 2370
2376 if (unlikely(ret)) { 2371 if (unlikely(ret < 0))
2377 unlock_page(page); 2372 goto out_unlock;
2378 if (ret == -ENOMEM) 2373 /*
2379 ret = VM_FAULT_OOM; 2374 * Freezing in progress? We check after the page is marked dirty and
2380 else /* -ENOSPC, -EIO, etc */ 2375 * with page lock held so if the test here fails, we are sure freezing
2381 ret = VM_FAULT_SIGBUS; 2376 * code will wait during syncing until the page fault is done - at that
2382 } else 2377 * point page will be dirty and unlocked so freezing code will write it
2383 ret = VM_FAULT_LOCKED; 2378 * and writeprotect it again.
2384 2379 */
2385out: 2380 set_page_dirty(page);
2381 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2382 ret = -EAGAIN;
2383 goto out_unlock;
2384 }
2385 return 0;
2386out_unlock:
2387 unlock_page(page);
2386 return ret; 2388 return ret;
2387} 2389}
2390EXPORT_SYMBOL(__block_page_mkwrite);
2391
2392int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2393 get_block_t get_block)
2394{
2395 int ret;
2396 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2397
2398 /*
2399 * This check is racy but catches the common case. The check in
2400 * __block_page_mkwrite() is reliable.
2401 */
2402 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2403 ret = __block_page_mkwrite(vma, vmf, get_block);
2404 return block_page_mkwrite_return(ret);
2405}
2388EXPORT_SYMBOL(block_page_mkwrite); 2406EXPORT_SYMBOL(block_page_mkwrite);
2389 2407
2390/* 2408/*
@@ -3138,17 +3156,6 @@ out:
3138} 3156}
3139EXPORT_SYMBOL(try_to_free_buffers); 3157EXPORT_SYMBOL(try_to_free_buffers);
3140 3158
3141void block_sync_page(struct page *page)
3142{
3143 struct address_space *mapping;
3144
3145 smp_mb();
3146 mapping = page_mapping(page);
3147 if (mapping)
3148 blk_run_backing_dev(mapping->backing_dev_info, page);
3149}
3150EXPORT_SYMBOL(block_sync_page);
3151
3152/* 3159/*
3153 * There are no bdflush tunables left. But distributions are 3160 * There are no bdflush tunables left. But distributions are
3154 * still running obsolete flush daemons, so we terminate them here. 3161 * still running obsolete flush daemons, so we terminate them here.