diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-07 05:15:40 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-07 05:15:40 -0400 |
commit | 5e34437840d33554f69380584311743b39e8fbeb (patch) | |
tree | e081135619ee146af5efb9ee883afca950df5757 /fs/buffer.c | |
parent | 77d05632baee21b1cef8730d7c06aa69601e4dca (diff) | |
parent | d508afb437daee7cf07da085b635c44a4ebf9b38 (diff) |
Merge branch 'linus' into core/softlockup
Conflicts:
kernel/sysctl.c
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 255 |
1 files changed, 78 insertions, 177 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..6e35762b6169 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -166,151 +166,6 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
166 | } | 166 | } |
167 | 167 | ||
168 | /* | 168 | /* |
169 | * Write out and wait upon all the dirty data associated with a block | ||
170 | * device via its mapping. Does not take the superblock lock. | ||
171 | */ | ||
172 | int sync_blockdev(struct block_device *bdev) | ||
173 | { | ||
174 | int ret = 0; | ||
175 | |||
176 | if (bdev) | ||
177 | ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
178 | return ret; | ||
179 | } | ||
180 | EXPORT_SYMBOL(sync_blockdev); | ||
181 | |||
182 | /* | ||
183 | * Write out and wait upon all dirty data associated with this | ||
184 | * device. Filesystem data as well as the underlying block | ||
185 | * device. Takes the superblock lock. | ||
186 | */ | ||
187 | int fsync_bdev(struct block_device *bdev) | ||
188 | { | ||
189 | struct super_block *sb = get_super(bdev); | ||
190 | if (sb) { | ||
191 | int res = fsync_super(sb); | ||
192 | drop_super(sb); | ||
193 | return res; | ||
194 | } | ||
195 | return sync_blockdev(bdev); | ||
196 | } | ||
197 | |||
198 | /** | ||
199 | * freeze_bdev -- lock a filesystem and force it into a consistent state | ||
200 | * @bdev: blockdevice to lock | ||
201 | * | ||
202 | * This takes the block device bd_mount_sem to make sure no new mounts | ||
203 | * happen on bdev until thaw_bdev() is called. | ||
204 | * If a superblock is found on this device, we take the s_umount semaphore | ||
205 | * on it to make sure nobody unmounts until the snapshot creation is done. | ||
206 | * The reference counter (bd_fsfreeze_count) guarantees that only the last | ||
207 | * unfreeze process can unfreeze the frozen filesystem actually when multiple | ||
208 | * freeze requests arrive simultaneously. It counts up in freeze_bdev() and | ||
209 | * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze | ||
210 | * actually. | ||
211 | */ | ||
212 | struct super_block *freeze_bdev(struct block_device *bdev) | ||
213 | { | ||
214 | struct super_block *sb; | ||
215 | int error = 0; | ||
216 | |||
217 | mutex_lock(&bdev->bd_fsfreeze_mutex); | ||
218 | if (bdev->bd_fsfreeze_count > 0) { | ||
219 | bdev->bd_fsfreeze_count++; | ||
220 | sb = get_super(bdev); | ||
221 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
222 | return sb; | ||
223 | } | ||
224 | bdev->bd_fsfreeze_count++; | ||
225 | |||
226 | down(&bdev->bd_mount_sem); | ||
227 | sb = get_super(bdev); | ||
228 | if (sb && !(sb->s_flags & MS_RDONLY)) { | ||
229 | sb->s_frozen = SB_FREEZE_WRITE; | ||
230 | smp_wmb(); | ||
231 | |||
232 | __fsync_super(sb); | ||
233 | |||
234 | sb->s_frozen = SB_FREEZE_TRANS; | ||
235 | smp_wmb(); | ||
236 | |||
237 | sync_blockdev(sb->s_bdev); | ||
238 | |||
239 | if (sb->s_op->freeze_fs) { | ||
240 | error = sb->s_op->freeze_fs(sb); | ||
241 | if (error) { | ||
242 | printk(KERN_ERR | ||
243 | "VFS:Filesystem freeze failed\n"); | ||
244 | sb->s_frozen = SB_UNFROZEN; | ||
245 | drop_super(sb); | ||
246 | up(&bdev->bd_mount_sem); | ||
247 | bdev->bd_fsfreeze_count--; | ||
248 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
249 | return ERR_PTR(error); | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | |||
254 | sync_blockdev(bdev); | ||
255 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
256 | |||
257 | return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ | ||
258 | } | ||
259 | EXPORT_SYMBOL(freeze_bdev); | ||
260 | |||
261 | /** | ||
262 | * thaw_bdev -- unlock filesystem | ||
263 | * @bdev: blockdevice to unlock | ||
264 | * @sb: associated superblock | ||
265 | * | ||
266 | * Unlocks the filesystem and marks it writeable again after freeze_bdev(). | ||
267 | */ | ||
268 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) | ||
269 | { | ||
270 | int error = 0; | ||
271 | |||
272 | mutex_lock(&bdev->bd_fsfreeze_mutex); | ||
273 | if (!bdev->bd_fsfreeze_count) { | ||
274 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
275 | return -EINVAL; | ||
276 | } | ||
277 | |||
278 | bdev->bd_fsfreeze_count--; | ||
279 | if (bdev->bd_fsfreeze_count > 0) { | ||
280 | if (sb) | ||
281 | drop_super(sb); | ||
282 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | if (sb) { | ||
287 | BUG_ON(sb->s_bdev != bdev); | ||
288 | if (!(sb->s_flags & MS_RDONLY)) { | ||
289 | if (sb->s_op->unfreeze_fs) { | ||
290 | error = sb->s_op->unfreeze_fs(sb); | ||
291 | if (error) { | ||
292 | printk(KERN_ERR | ||
293 | "VFS:Filesystem thaw failed\n"); | ||
294 | sb->s_frozen = SB_FREEZE_TRANS; | ||
295 | bdev->bd_fsfreeze_count++; | ||
296 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
297 | return error; | ||
298 | } | ||
299 | } | ||
300 | sb->s_frozen = SB_UNFROZEN; | ||
301 | smp_wmb(); | ||
302 | wake_up(&sb->s_wait_unfrozen); | ||
303 | } | ||
304 | drop_super(sb); | ||
305 | } | ||
306 | |||
307 | up(&bdev->bd_mount_sem); | ||
308 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
309 | return 0; | ||
310 | } | ||
311 | EXPORT_SYMBOL(thaw_bdev); | ||
312 | |||
313 | /* | ||
314 | * Various filesystems appear to want __find_get_block to be non-blocking. | 169 | * Various filesystems appear to want __find_get_block to be non-blocking. |
315 | * But it's the page lock which protects the buffers. To get around this, | 170 | * But it's the page lock which protects the buffers. To get around this, |
316 | * we get exclusion from try_to_free_buffers with the blockdev mapping's | 171 | * we get exclusion from try_to_free_buffers with the blockdev mapping's |
@@ -344,13 +199,13 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) | |||
344 | head = page_buffers(page); | 199 | head = page_buffers(page); |
345 | bh = head; | 200 | bh = head; |
346 | do { | 201 | do { |
347 | if (bh->b_blocknr == block) { | 202 | if (!buffer_mapped(bh)) |
203 | all_mapped = 0; | ||
204 | else if (bh->b_blocknr == block) { | ||
348 | ret = bh; | 205 | ret = bh; |
349 | get_bh(bh); | 206 | get_bh(bh); |
350 | goto out_unlock; | 207 | goto out_unlock; |
351 | } | 208 | } |
352 | if (!buffer_mapped(bh)) | ||
353 | all_mapped = 0; | ||
354 | bh = bh->b_this_page; | 209 | bh = bh->b_this_page; |
355 | } while (bh != head); | 210 | } while (bh != head); |
356 | 211 | ||
@@ -435,7 +290,7 @@ static void free_more_memory(void) | |||
435 | &zone); | 290 | &zone); |
436 | if (zone) | 291 | if (zone) |
437 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, | 292 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, |
438 | GFP_NOFS); | 293 | GFP_NOFS, NULL); |
439 | } | 294 | } |
440 | } | 295 | } |
441 | 296 | ||
@@ -692,6 +547,39 @@ repeat: | |||
692 | return err; | 547 | return err; |
693 | } | 548 | } |
694 | 549 | ||
550 | void do_thaw_all(unsigned long unused) | ||
551 | { | ||
552 | struct super_block *sb; | ||
553 | char b[BDEVNAME_SIZE]; | ||
554 | |||
555 | spin_lock(&sb_lock); | ||
556 | restart: | ||
557 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
558 | sb->s_count++; | ||
559 | spin_unlock(&sb_lock); | ||
560 | down_read(&sb->s_umount); | ||
561 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
562 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
563 | bdevname(sb->s_bdev, b)); | ||
564 | up_read(&sb->s_umount); | ||
565 | spin_lock(&sb_lock); | ||
566 | if (__put_super_and_need_restart(sb)) | ||
567 | goto restart; | ||
568 | } | ||
569 | spin_unlock(&sb_lock); | ||
570 | printk(KERN_WARNING "Emergency Thaw complete\n"); | ||
571 | } | ||
572 | |||
573 | /** | ||
574 | * emergency_thaw_all -- forcibly thaw every frozen filesystem | ||
575 | * | ||
576 | * Used for emergency unfreeze of all filesystems via SysRq | ||
577 | */ | ||
578 | void emergency_thaw_all(void) | ||
579 | { | ||
580 | pdflush_operation(do_thaw_all, 0); | ||
581 | } | ||
582 | |||
695 | /** | 583 | /** |
696 | * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers | 584 | * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers |
697 | * @mapping: the mapping which wants those buffers written | 585 | * @mapping: the mapping which wants those buffers written |
@@ -760,32 +648,18 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
760 | * If warn is true, then emit a warning if the page is not uptodate and has | 648 | * If warn is true, then emit a warning if the page is not uptodate and has |
761 | * not been truncated. | 649 | * not been truncated. |
762 | */ | 650 | */ |
763 | static int __set_page_dirty(struct page *page, | 651 | static void __set_page_dirty(struct page *page, |
764 | struct address_space *mapping, int warn) | 652 | struct address_space *mapping, int warn) |
765 | { | 653 | { |
766 | if (unlikely(!mapping)) | ||
767 | return !TestSetPageDirty(page); | ||
768 | |||
769 | if (TestSetPageDirty(page)) | ||
770 | return 0; | ||
771 | |||
772 | spin_lock_irq(&mapping->tree_lock); | 654 | spin_lock_irq(&mapping->tree_lock); |
773 | if (page->mapping) { /* Race with truncate? */ | 655 | if (page->mapping) { /* Race with truncate? */ |
774 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 656 | WARN_ON_ONCE(warn && !PageUptodate(page)); |
775 | 657 | account_page_dirtied(page, mapping); | |
776 | if (mapping_cap_account_dirty(mapping)) { | ||
777 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
778 | __inc_bdi_stat(mapping->backing_dev_info, | ||
779 | BDI_RECLAIMABLE); | ||
780 | task_io_account_write(PAGE_CACHE_SIZE); | ||
781 | } | ||
782 | radix_tree_tag_set(&mapping->page_tree, | 658 | radix_tree_tag_set(&mapping->page_tree, |
783 | page_index(page), PAGECACHE_TAG_DIRTY); | 659 | page_index(page), PAGECACHE_TAG_DIRTY); |
784 | } | 660 | } |
785 | spin_unlock_irq(&mapping->tree_lock); | 661 | spin_unlock_irq(&mapping->tree_lock); |
786 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 662 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
787 | |||
788 | return 1; | ||
789 | } | 663 | } |
790 | 664 | ||
791 | /* | 665 | /* |
@@ -815,6 +689,7 @@ static int __set_page_dirty(struct page *page, | |||
815 | */ | 689 | */ |
816 | int __set_page_dirty_buffers(struct page *page) | 690 | int __set_page_dirty_buffers(struct page *page) |
817 | { | 691 | { |
692 | int newly_dirty; | ||
818 | struct address_space *mapping = page_mapping(page); | 693 | struct address_space *mapping = page_mapping(page); |
819 | 694 | ||
820 | if (unlikely(!mapping)) | 695 | if (unlikely(!mapping)) |
@@ -830,9 +705,12 @@ int __set_page_dirty_buffers(struct page *page) | |||
830 | bh = bh->b_this_page; | 705 | bh = bh->b_this_page; |
831 | } while (bh != head); | 706 | } while (bh != head); |
832 | } | 707 | } |
708 | newly_dirty = !TestSetPageDirty(page); | ||
833 | spin_unlock(&mapping->private_lock); | 709 | spin_unlock(&mapping->private_lock); |
834 | 710 | ||
835 | return __set_page_dirty(page, mapping, 1); | 711 | if (newly_dirty) |
712 | __set_page_dirty(page, mapping, 1); | ||
713 | return newly_dirty; | ||
836 | } | 714 | } |
837 | EXPORT_SYMBOL(__set_page_dirty_buffers); | 715 | EXPORT_SYMBOL(__set_page_dirty_buffers); |
838 | 716 | ||
@@ -859,7 +737,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
859 | { | 737 | { |
860 | struct buffer_head *bh; | 738 | struct buffer_head *bh; |
861 | struct list_head tmp; | 739 | struct list_head tmp; |
862 | struct address_space *mapping; | 740 | struct address_space *mapping, *prev_mapping = NULL; |
863 | int err = 0, err2; | 741 | int err = 0, err2; |
864 | 742 | ||
865 | INIT_LIST_HEAD(&tmp); | 743 | INIT_LIST_HEAD(&tmp); |
@@ -884,7 +762,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
884 | * contents - it is a noop if I/O is still in | 762 | * contents - it is a noop if I/O is still in |
885 | * flight on potentially older contents. | 763 | * flight on potentially older contents. |
886 | */ | 764 | */ |
887 | ll_rw_block(SWRITE_SYNC, 1, &bh); | 765 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); |
766 | |||
767 | /* | ||
768 | * Kick off IO for the previous mapping. Note | ||
769 | * that we will not run the very last mapping, | ||
770 | * wait_on_buffer() will do that for us | ||
771 | * through sync_buffer(). | ||
772 | */ | ||
773 | if (prev_mapping && prev_mapping != mapping) | ||
774 | blk_run_address_space(prev_mapping); | ||
775 | prev_mapping = mapping; | ||
776 | |||
888 | brelse(bh); | 777 | brelse(bh); |
889 | spin_lock(lock); | 778 | spin_lock(lock); |
890 | } | 779 | } |
@@ -1261,8 +1150,11 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1261 | return; | 1150 | return; |
1262 | } | 1151 | } |
1263 | 1152 | ||
1264 | if (!test_set_buffer_dirty(bh)) | 1153 | if (!test_set_buffer_dirty(bh)) { |
1265 | __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); | 1154 | struct page *page = bh->b_page; |
1155 | if (!TestSetPageDirty(page)) | ||
1156 | __set_page_dirty(page, page_mapping(page), 0); | ||
1157 | } | ||
1266 | } | 1158 | } |
1267 | 1159 | ||
1268 | /* | 1160 | /* |
@@ -1714,6 +1606,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1714 | struct buffer_head *bh, *head; | 1606 | struct buffer_head *bh, *head; |
1715 | const unsigned blocksize = 1 << inode->i_blkbits; | 1607 | const unsigned blocksize = 1 << inode->i_blkbits; |
1716 | int nr_underway = 0; | 1608 | int nr_underway = 0; |
1609 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
1717 | 1610 | ||
1718 | BUG_ON(!PageLocked(page)); | 1611 | BUG_ON(!PageLocked(page)); |
1719 | 1612 | ||
@@ -1805,7 +1698,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1805 | do { | 1698 | do { |
1806 | struct buffer_head *next = bh->b_this_page; | 1699 | struct buffer_head *next = bh->b_this_page; |
1807 | if (buffer_async_write(bh)) { | 1700 | if (buffer_async_write(bh)) { |
1808 | submit_bh(WRITE, bh); | 1701 | submit_bh(write_op, bh); |
1809 | nr_underway++; | 1702 | nr_underway++; |
1810 | } | 1703 | } |
1811 | bh = next; | 1704 | bh = next; |
@@ -1859,7 +1752,7 @@ recover: | |||
1859 | struct buffer_head *next = bh->b_this_page; | 1752 | struct buffer_head *next = bh->b_this_page; |
1860 | if (buffer_async_write(bh)) { | 1753 | if (buffer_async_write(bh)) { |
1861 | clear_buffer_dirty(bh); | 1754 | clear_buffer_dirty(bh); |
1862 | submit_bh(WRITE, bh); | 1755 | submit_bh(write_op, bh); |
1863 | nr_underway++; | 1756 | nr_underway++; |
1864 | } | 1757 | } |
1865 | bh = next; | 1758 | bh = next; |
@@ -2465,13 +2358,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) | |||
2465 | * unlock the page. | 2358 | * unlock the page. |
2466 | */ | 2359 | */ |
2467 | int | 2360 | int |
2468 | block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | 2361 | block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
2469 | get_block_t get_block) | 2362 | get_block_t get_block) |
2470 | { | 2363 | { |
2364 | struct page *page = vmf->page; | ||
2471 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 2365 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
2472 | unsigned long end; | 2366 | unsigned long end; |
2473 | loff_t size; | 2367 | loff_t size; |
2474 | int ret = -EINVAL; | 2368 | int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
2475 | 2369 | ||
2476 | lock_page(page); | 2370 | lock_page(page); |
2477 | size = i_size_read(inode); | 2371 | size = i_size_read(inode); |
@@ -2491,6 +2385,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
2491 | if (!ret) | 2385 | if (!ret) |
2492 | ret = block_commit_write(page, 0, end); | 2386 | ret = block_commit_write(page, 0, end); |
2493 | 2387 | ||
2388 | if (unlikely(ret)) { | ||
2389 | if (ret == -ENOMEM) | ||
2390 | ret = VM_FAULT_OOM; | ||
2391 | else /* -ENOSPC, -EIO, etc */ | ||
2392 | ret = VM_FAULT_SIGBUS; | ||
2393 | } | ||
2394 | |||
2494 | out_unlock: | 2395 | out_unlock: |
2495 | unlock_page(page); | 2396 | unlock_page(page); |
2496 | return ret; | 2397 | return ret; |
@@ -3067,12 +2968,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
3067 | for (i = 0; i < nr; i++) { | 2968 | for (i = 0; i < nr; i++) { |
3068 | struct buffer_head *bh = bhs[i]; | 2969 | struct buffer_head *bh = bhs[i]; |
3069 | 2970 | ||
3070 | if (rw == SWRITE || rw == SWRITE_SYNC) | 2971 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) |
3071 | lock_buffer(bh); | 2972 | lock_buffer(bh); |
3072 | else if (!trylock_buffer(bh)) | 2973 | else if (!trylock_buffer(bh)) |
3073 | continue; | 2974 | continue; |
3074 | 2975 | ||
3075 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { | 2976 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || |
2977 | rw == SWRITE_SYNC_PLUG) { | ||
3076 | if (test_clear_buffer_dirty(bh)) { | 2978 | if (test_clear_buffer_dirty(bh)) { |
3077 | bh->b_end_io = end_buffer_write_sync; | 2979 | bh->b_end_io = end_buffer_write_sync; |
3078 | get_bh(bh); | 2980 | get_bh(bh); |
@@ -3426,7 +3328,6 @@ EXPORT_SYMBOL(cont_write_begin); | |||
3426 | EXPORT_SYMBOL(end_buffer_read_sync); | 3328 | EXPORT_SYMBOL(end_buffer_read_sync); |
3427 | EXPORT_SYMBOL(end_buffer_write_sync); | 3329 | EXPORT_SYMBOL(end_buffer_write_sync); |
3428 | EXPORT_SYMBOL(file_fsync); | 3330 | EXPORT_SYMBOL(file_fsync); |
3429 | EXPORT_SYMBOL(fsync_bdev); | ||
3430 | EXPORT_SYMBOL(generic_block_bmap); | 3331 | EXPORT_SYMBOL(generic_block_bmap); |
3431 | EXPORT_SYMBOL(generic_cont_expand_simple); | 3332 | EXPORT_SYMBOL(generic_cont_expand_simple); |
3432 | EXPORT_SYMBOL(init_buffer); | 3333 | EXPORT_SYMBOL(init_buffer); |