aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-07 05:15:40 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 05:15:40 -0400
commit5e34437840d33554f69380584311743b39e8fbeb (patch)
treee081135619ee146af5efb9ee883afca950df5757 /fs/buffer.c
parent77d05632baee21b1cef8730d7c06aa69601e4dca (diff)
parentd508afb437daee7cf07da085b635c44a4ebf9b38 (diff)
Merge branch 'linus' into core/softlockup
Conflicts: kernel/sysctl.c
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c255
1 files changed, 78 insertions, 177 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 665d446b25bc..6e35762b6169 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -166,151 +166,6 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
166} 166}
167 167
168/* 168/*
169 * Write out and wait upon all the dirty data associated with a block
170 * device via its mapping. Does not take the superblock lock.
171 */
172int sync_blockdev(struct block_device *bdev)
173{
174 int ret = 0;
175
176 if (bdev)
177 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
178 return ret;
179}
180EXPORT_SYMBOL(sync_blockdev);
181
182/*
183 * Write out and wait upon all dirty data associated with this
184 * device. Filesystem data as well as the underlying block
185 * device. Takes the superblock lock.
186 */
187int fsync_bdev(struct block_device *bdev)
188{
189 struct super_block *sb = get_super(bdev);
190 if (sb) {
191 int res = fsync_super(sb);
192 drop_super(sb);
193 return res;
194 }
195 return sync_blockdev(bdev);
196}
197
198/**
199 * freeze_bdev -- lock a filesystem and force it into a consistent state
200 * @bdev: blockdevice to lock
201 *
202 * This takes the block device bd_mount_sem to make sure no new mounts
203 * happen on bdev until thaw_bdev() is called.
204 * If a superblock is found on this device, we take the s_umount semaphore
205 * on it to make sure nobody unmounts until the snapshot creation is done.
206 * The reference counter (bd_fsfreeze_count) guarantees that only the last
207 * unfreeze process can unfreeze the frozen filesystem actually when multiple
208 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
209 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
210 * actually.
211 */
212struct super_block *freeze_bdev(struct block_device *bdev)
213{
214 struct super_block *sb;
215 int error = 0;
216
217 mutex_lock(&bdev->bd_fsfreeze_mutex);
218 if (bdev->bd_fsfreeze_count > 0) {
219 bdev->bd_fsfreeze_count++;
220 sb = get_super(bdev);
221 mutex_unlock(&bdev->bd_fsfreeze_mutex);
222 return sb;
223 }
224 bdev->bd_fsfreeze_count++;
225
226 down(&bdev->bd_mount_sem);
227 sb = get_super(bdev);
228 if (sb && !(sb->s_flags & MS_RDONLY)) {
229 sb->s_frozen = SB_FREEZE_WRITE;
230 smp_wmb();
231
232 __fsync_super(sb);
233
234 sb->s_frozen = SB_FREEZE_TRANS;
235 smp_wmb();
236
237 sync_blockdev(sb->s_bdev);
238
239 if (sb->s_op->freeze_fs) {
240 error = sb->s_op->freeze_fs(sb);
241 if (error) {
242 printk(KERN_ERR
243 "VFS:Filesystem freeze failed\n");
244 sb->s_frozen = SB_UNFROZEN;
245 drop_super(sb);
246 up(&bdev->bd_mount_sem);
247 bdev->bd_fsfreeze_count--;
248 mutex_unlock(&bdev->bd_fsfreeze_mutex);
249 return ERR_PTR(error);
250 }
251 }
252 }
253
254 sync_blockdev(bdev);
255 mutex_unlock(&bdev->bd_fsfreeze_mutex);
256
257 return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
258}
259EXPORT_SYMBOL(freeze_bdev);
260
261/**
262 * thaw_bdev -- unlock filesystem
263 * @bdev: blockdevice to unlock
264 * @sb: associated superblock
265 *
266 * Unlocks the filesystem and marks it writeable again after freeze_bdev().
267 */
268int thaw_bdev(struct block_device *bdev, struct super_block *sb)
269{
270 int error = 0;
271
272 mutex_lock(&bdev->bd_fsfreeze_mutex);
273 if (!bdev->bd_fsfreeze_count) {
274 mutex_unlock(&bdev->bd_fsfreeze_mutex);
275 return -EINVAL;
276 }
277
278 bdev->bd_fsfreeze_count--;
279 if (bdev->bd_fsfreeze_count > 0) {
280 if (sb)
281 drop_super(sb);
282 mutex_unlock(&bdev->bd_fsfreeze_mutex);
283 return 0;
284 }
285
286 if (sb) {
287 BUG_ON(sb->s_bdev != bdev);
288 if (!(sb->s_flags & MS_RDONLY)) {
289 if (sb->s_op->unfreeze_fs) {
290 error = sb->s_op->unfreeze_fs(sb);
291 if (error) {
292 printk(KERN_ERR
293 "VFS:Filesystem thaw failed\n");
294 sb->s_frozen = SB_FREEZE_TRANS;
295 bdev->bd_fsfreeze_count++;
296 mutex_unlock(&bdev->bd_fsfreeze_mutex);
297 return error;
298 }
299 }
300 sb->s_frozen = SB_UNFROZEN;
301 smp_wmb();
302 wake_up(&sb->s_wait_unfrozen);
303 }
304 drop_super(sb);
305 }
306
307 up(&bdev->bd_mount_sem);
308 mutex_unlock(&bdev->bd_fsfreeze_mutex);
309 return 0;
310}
311EXPORT_SYMBOL(thaw_bdev);
312
313/*
314 * Various filesystems appear to want __find_get_block to be non-blocking. 169 * Various filesystems appear to want __find_get_block to be non-blocking.
315 * But it's the page lock which protects the buffers. To get around this, 170 * But it's the page lock which protects the buffers. To get around this,
316 * we get exclusion from try_to_free_buffers with the blockdev mapping's 171 * we get exclusion from try_to_free_buffers with the blockdev mapping's
@@ -344,13 +199,13 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
344 head = page_buffers(page); 199 head = page_buffers(page);
345 bh = head; 200 bh = head;
346 do { 201 do {
347 if (bh->b_blocknr == block) { 202 if (!buffer_mapped(bh))
203 all_mapped = 0;
204 else if (bh->b_blocknr == block) {
348 ret = bh; 205 ret = bh;
349 get_bh(bh); 206 get_bh(bh);
350 goto out_unlock; 207 goto out_unlock;
351 } 208 }
352 if (!buffer_mapped(bh))
353 all_mapped = 0;
354 bh = bh->b_this_page; 209 bh = bh->b_this_page;
355 } while (bh != head); 210 } while (bh != head);
356 211
@@ -435,7 +290,7 @@ static void free_more_memory(void)
435 &zone); 290 &zone);
436 if (zone) 291 if (zone)
437 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, 292 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
438 GFP_NOFS); 293 GFP_NOFS, NULL);
439 } 294 }
440} 295}
441 296
@@ -692,6 +547,39 @@ repeat:
692 return err; 547 return err;
693} 548}
694 549
550void do_thaw_all(unsigned long unused)
551{
552 struct super_block *sb;
553 char b[BDEVNAME_SIZE];
554
555 spin_lock(&sb_lock);
556restart:
557 list_for_each_entry(sb, &super_blocks, s_list) {
558 sb->s_count++;
559 spin_unlock(&sb_lock);
560 down_read(&sb->s_umount);
561 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
562 printk(KERN_WARNING "Emergency Thaw on %s\n",
563 bdevname(sb->s_bdev, b));
564 up_read(&sb->s_umount);
565 spin_lock(&sb_lock);
566 if (__put_super_and_need_restart(sb))
567 goto restart;
568 }
569 spin_unlock(&sb_lock);
570 printk(KERN_WARNING "Emergency Thaw complete\n");
571}
572
573/**
574 * emergency_thaw_all -- forcibly thaw every frozen filesystem
575 *
576 * Used for emergency unfreeze of all filesystems via SysRq
577 */
578void emergency_thaw_all(void)
579{
580 pdflush_operation(do_thaw_all, 0);
581}
582
695/** 583/**
696 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers 584 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
697 * @mapping: the mapping which wants those buffers written 585 * @mapping: the mapping which wants those buffers written
@@ -760,32 +648,18 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
760 * If warn is true, then emit a warning if the page is not uptodate and has 648 * If warn is true, then emit a warning if the page is not uptodate and has
761 * not been truncated. 649 * not been truncated.
762 */ 650 */
763static int __set_page_dirty(struct page *page, 651static void __set_page_dirty(struct page *page,
764 struct address_space *mapping, int warn) 652 struct address_space *mapping, int warn)
765{ 653{
766 if (unlikely(!mapping))
767 return !TestSetPageDirty(page);
768
769 if (TestSetPageDirty(page))
770 return 0;
771
772 spin_lock_irq(&mapping->tree_lock); 654 spin_lock_irq(&mapping->tree_lock);
773 if (page->mapping) { /* Race with truncate? */ 655 if (page->mapping) { /* Race with truncate? */
774 WARN_ON_ONCE(warn && !PageUptodate(page)); 656 WARN_ON_ONCE(warn && !PageUptodate(page));
775 657 account_page_dirtied(page, mapping);
776 if (mapping_cap_account_dirty(mapping)) {
777 __inc_zone_page_state(page, NR_FILE_DIRTY);
778 __inc_bdi_stat(mapping->backing_dev_info,
779 BDI_RECLAIMABLE);
780 task_io_account_write(PAGE_CACHE_SIZE);
781 }
782 radix_tree_tag_set(&mapping->page_tree, 658 radix_tree_tag_set(&mapping->page_tree,
783 page_index(page), PAGECACHE_TAG_DIRTY); 659 page_index(page), PAGECACHE_TAG_DIRTY);
784 } 660 }
785 spin_unlock_irq(&mapping->tree_lock); 661 spin_unlock_irq(&mapping->tree_lock);
786 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 662 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
787
788 return 1;
789} 663}
790 664
791/* 665/*
@@ -815,6 +689,7 @@ static int __set_page_dirty(struct page *page,
815 */ 689 */
816int __set_page_dirty_buffers(struct page *page) 690int __set_page_dirty_buffers(struct page *page)
817{ 691{
692 int newly_dirty;
818 struct address_space *mapping = page_mapping(page); 693 struct address_space *mapping = page_mapping(page);
819 694
820 if (unlikely(!mapping)) 695 if (unlikely(!mapping))
@@ -830,9 +705,12 @@ int __set_page_dirty_buffers(struct page *page)
830 bh = bh->b_this_page; 705 bh = bh->b_this_page;
831 } while (bh != head); 706 } while (bh != head);
832 } 707 }
708 newly_dirty = !TestSetPageDirty(page);
833 spin_unlock(&mapping->private_lock); 709 spin_unlock(&mapping->private_lock);
834 710
835 return __set_page_dirty(page, mapping, 1); 711 if (newly_dirty)
712 __set_page_dirty(page, mapping, 1);
713 return newly_dirty;
836} 714}
837EXPORT_SYMBOL(__set_page_dirty_buffers); 715EXPORT_SYMBOL(__set_page_dirty_buffers);
838 716
@@ -859,7 +737,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
859{ 737{
860 struct buffer_head *bh; 738 struct buffer_head *bh;
861 struct list_head tmp; 739 struct list_head tmp;
862 struct address_space *mapping; 740 struct address_space *mapping, *prev_mapping = NULL;
863 int err = 0, err2; 741 int err = 0, err2;
864 742
865 INIT_LIST_HEAD(&tmp); 743 INIT_LIST_HEAD(&tmp);
@@ -884,7 +762,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
884 * contents - it is a noop if I/O is still in 762 * contents - it is a noop if I/O is still in
885 * flight on potentially older contents. 763 * flight on potentially older contents.
886 */ 764 */
887 ll_rw_block(SWRITE_SYNC, 1, &bh); 765 ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
766
767 /*
768 * Kick off IO for the previous mapping. Note
769 * that we will not run the very last mapping,
770 * wait_on_buffer() will do that for us
771 * through sync_buffer().
772 */
773 if (prev_mapping && prev_mapping != mapping)
774 blk_run_address_space(prev_mapping);
775 prev_mapping = mapping;
776
888 brelse(bh); 777 brelse(bh);
889 spin_lock(lock); 778 spin_lock(lock);
890 } 779 }
@@ -1261,8 +1150,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
1261 return; 1150 return;
1262 } 1151 }
1263 1152
1264 if (!test_set_buffer_dirty(bh)) 1153 if (!test_set_buffer_dirty(bh)) {
1265 __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); 1154 struct page *page = bh->b_page;
1155 if (!TestSetPageDirty(page))
1156 __set_page_dirty(page, page_mapping(page), 0);
1157 }
1266} 1158}
1267 1159
1268/* 1160/*
@@ -1714,6 +1606,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1714 struct buffer_head *bh, *head; 1606 struct buffer_head *bh, *head;
1715 const unsigned blocksize = 1 << inode->i_blkbits; 1607 const unsigned blocksize = 1 << inode->i_blkbits;
1716 int nr_underway = 0; 1608 int nr_underway = 0;
1609 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
1717 1610
1718 BUG_ON(!PageLocked(page)); 1611 BUG_ON(!PageLocked(page));
1719 1612
@@ -1805,7 +1698,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1805 do { 1698 do {
1806 struct buffer_head *next = bh->b_this_page; 1699 struct buffer_head *next = bh->b_this_page;
1807 if (buffer_async_write(bh)) { 1700 if (buffer_async_write(bh)) {
1808 submit_bh(WRITE, bh); 1701 submit_bh(write_op, bh);
1809 nr_underway++; 1702 nr_underway++;
1810 } 1703 }
1811 bh = next; 1704 bh = next;
@@ -1859,7 +1752,7 @@ recover:
1859 struct buffer_head *next = bh->b_this_page; 1752 struct buffer_head *next = bh->b_this_page;
1860 if (buffer_async_write(bh)) { 1753 if (buffer_async_write(bh)) {
1861 clear_buffer_dirty(bh); 1754 clear_buffer_dirty(bh);
1862 submit_bh(WRITE, bh); 1755 submit_bh(write_op, bh);
1863 nr_underway++; 1756 nr_underway++;
1864 } 1757 }
1865 bh = next; 1758 bh = next;
@@ -2465,13 +2358,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2465 * unlock the page. 2358 * unlock the page.
2466 */ 2359 */
2467int 2360int
2468block_page_mkwrite(struct vm_area_struct *vma, struct page *page, 2361block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2469 get_block_t get_block) 2362 get_block_t get_block)
2470{ 2363{
2364 struct page *page = vmf->page;
2471 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2365 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2472 unsigned long end; 2366 unsigned long end;
2473 loff_t size; 2367 loff_t size;
2474 int ret = -EINVAL; 2368 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
2475 2369
2476 lock_page(page); 2370 lock_page(page);
2477 size = i_size_read(inode); 2371 size = i_size_read(inode);
@@ -2491,6 +2385,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
2491 if (!ret) 2385 if (!ret)
2492 ret = block_commit_write(page, 0, end); 2386 ret = block_commit_write(page, 0, end);
2493 2387
2388 if (unlikely(ret)) {
2389 if (ret == -ENOMEM)
2390 ret = VM_FAULT_OOM;
2391 else /* -ENOSPC, -EIO, etc */
2392 ret = VM_FAULT_SIGBUS;
2393 }
2394
2494out_unlock: 2395out_unlock:
2495 unlock_page(page); 2396 unlock_page(page);
2496 return ret; 2397 return ret;
@@ -3067,12 +2968,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3067 for (i = 0; i < nr; i++) { 2968 for (i = 0; i < nr; i++) {
3068 struct buffer_head *bh = bhs[i]; 2969 struct buffer_head *bh = bhs[i];
3069 2970
3070 if (rw == SWRITE || rw == SWRITE_SYNC) 2971 if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
3071 lock_buffer(bh); 2972 lock_buffer(bh);
3072 else if (!trylock_buffer(bh)) 2973 else if (!trylock_buffer(bh))
3073 continue; 2974 continue;
3074 2975
3075 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { 2976 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
2977 rw == SWRITE_SYNC_PLUG) {
3076 if (test_clear_buffer_dirty(bh)) { 2978 if (test_clear_buffer_dirty(bh)) {
3077 bh->b_end_io = end_buffer_write_sync; 2979 bh->b_end_io = end_buffer_write_sync;
3078 get_bh(bh); 2980 get_bh(bh);
@@ -3426,7 +3328,6 @@ EXPORT_SYMBOL(cont_write_begin);
3426EXPORT_SYMBOL(end_buffer_read_sync); 3328EXPORT_SYMBOL(end_buffer_read_sync);
3427EXPORT_SYMBOL(end_buffer_write_sync); 3329EXPORT_SYMBOL(end_buffer_write_sync);
3428EXPORT_SYMBOL(file_fsync); 3330EXPORT_SYMBOL(file_fsync);
3429EXPORT_SYMBOL(fsync_bdev);
3430EXPORT_SYMBOL(generic_block_bmap); 3331EXPORT_SYMBOL(generic_block_bmap);
3431EXPORT_SYMBOL(generic_cont_expand_simple); 3332EXPORT_SYMBOL(generic_cont_expand_simple);
3432EXPORT_SYMBOL(init_buffer); 3333EXPORT_SYMBOL(init_buffer);