aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-01-20 16:05:00 -0500
committerJens Axboe <axboe@fb.com>2015-01-20 16:05:00 -0500
commitc4db59d31e39ea067c32163ac961e9c80198fd37 (patch)
treef4a4d6127e70f98adad3f2abfd0738abbe72d7b5 /mm
parent7b14a213890a81473ec97ad7e322d5c2f19854ae (diff)
fs: don't reassign dirty inodes to default_backing_dev_info
If we have dirty inodes we need to call the filesystem for it, even if the device has been removed and the filesystem will error out early. The current code does that by reassining all dirty inodes to the default backing_dev_info when a bdi is unlinked, but that's pretty pointless given that the bdi must always outlive the super block. Instead of stopping writeback at unregister time and moving inodes to the default bdi just keep the current bdi alive until it is destroyed. The containing objects of the bdi ensure this doesn't happen until all writeback has finished by erroring out. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Tejun Heo <tj@kernel.org> Reviewed-by: Jan Kara <jack@suse.cz> Killed the redundant WARN_ON(), as noticed by Jan. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c90
1 files changed, 23 insertions, 67 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 52e0c7652448..1725adb242e0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -37,17 +37,6 @@ LIST_HEAD(bdi_list);
37/* bdi_wq serves all asynchronous writeback tasks */ 37/* bdi_wq serves all asynchronous writeback tasks */
38struct workqueue_struct *bdi_wq; 38struct workqueue_struct *bdi_wq;
39 39
40static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
41{
42 if (wb1 < wb2) {
43 spin_lock(&wb1->list_lock);
44 spin_lock_nested(&wb2->list_lock, 1);
45 } else {
46 spin_lock(&wb2->list_lock);
47 spin_lock_nested(&wb1->list_lock, 1);
48 }
49}
50
51#ifdef CONFIG_DEBUG_FS 40#ifdef CONFIG_DEBUG_FS
52#include <linux/debugfs.h> 41#include <linux/debugfs.h>
53#include <linux/seq_file.h> 42#include <linux/seq_file.h>
@@ -352,19 +341,19 @@ EXPORT_SYMBOL(bdi_register_dev);
352 */ 341 */
353static void bdi_wb_shutdown(struct backing_dev_info *bdi) 342static void bdi_wb_shutdown(struct backing_dev_info *bdi)
354{ 343{
355 if (!bdi_cap_writeback_dirty(bdi)) 344 /* Make sure nobody queues further work */
345 spin_lock_bh(&bdi->wb_lock);
346 if (!test_and_clear_bit(BDI_registered, &bdi->state)) {
347 spin_unlock_bh(&bdi->wb_lock);
356 return; 348 return;
349 }
350 spin_unlock_bh(&bdi->wb_lock);
357 351
358 /* 352 /*
359 * Make sure nobody finds us on the bdi_list anymore 353 * Make sure nobody finds us on the bdi_list anymore
360 */ 354 */
361 bdi_remove_from_list(bdi); 355 bdi_remove_from_list(bdi);
362 356
363 /* Make sure nobody queues further work */
364 spin_lock_bh(&bdi->wb_lock);
365 clear_bit(BDI_registered, &bdi->state);
366 spin_unlock_bh(&bdi->wb_lock);
367
368 /* 357 /*
369 * Drain work list and shutdown the delayed_work. At this point, 358 * Drain work list and shutdown the delayed_work. At this point,
370 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi 359 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
@@ -372,37 +361,22 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
372 */ 361 */
373 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); 362 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
374 flush_delayed_work(&bdi->wb.dwork); 363 flush_delayed_work(&bdi->wb.dwork);
375 WARN_ON(!list_empty(&bdi->work_list));
376 WARN_ON(delayed_work_pending(&bdi->wb.dwork));
377} 364}
378 365
379/* 366/*
380 * This bdi is going away now, make sure that no super_blocks point to it 367 * Called when the device behind @bdi has been removed or ejected.
368 *
369 * We can't really do much here except for reducing the dirty ratio at
370 * the moment. In the future we should be able to set a flag so that
371 * the filesystem can handle errors at mark_inode_dirty time instead
372 * of only at writeback time.
381 */ 373 */
382static void bdi_prune_sb(struct backing_dev_info *bdi)
383{
384 struct super_block *sb;
385
386 spin_lock(&sb_lock);
387 list_for_each_entry(sb, &super_blocks, s_list) {
388 if (sb->s_bdi == bdi)
389 sb->s_bdi = &default_backing_dev_info;
390 }
391 spin_unlock(&sb_lock);
392}
393
394void bdi_unregister(struct backing_dev_info *bdi) 374void bdi_unregister(struct backing_dev_info *bdi)
395{ 375{
396 if (bdi->dev) { 376 if (WARN_ON_ONCE(!bdi->dev))
397 bdi_set_min_ratio(bdi, 0); 377 return;
398 trace_writeback_bdi_unregister(bdi);
399 bdi_prune_sb(bdi);
400 378
401 bdi_wb_shutdown(bdi); 379 bdi_set_min_ratio(bdi, 0);
402 bdi_debug_unregister(bdi);
403 device_unregister(bdi->dev);
404 bdi->dev = NULL;
405 }
406} 380}
407EXPORT_SYMBOL(bdi_unregister); 381EXPORT_SYMBOL(bdi_unregister);
408 382
@@ -471,37 +445,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
471{ 445{
472 int i; 446 int i;
473 447
474 /* 448 bdi_wb_shutdown(bdi);
475 * Splice our entries to the default_backing_dev_info. This
476 * condition shouldn't happen. @wb must be empty at this point and
477 * dirty inodes on it might cause other issues. This workaround is
478 * added by ce5f8e779519 ("writeback: splice dirty inode entries to
479 * default bdi on bdi_destroy()") without root-causing the issue.
480 *
481 * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com
482 * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350
483 *
484 * We should probably add WARN_ON() to find out whether it still
485 * happens and track it down if so.
486 */
487 if (bdi_has_dirty_io(bdi)) {
488 struct bdi_writeback *dst = &default_backing_dev_info.wb;
489
490 bdi_lock_two(&bdi->wb, dst);
491 list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
492 list_splice(&bdi->wb.b_io, &dst->b_io);
493 list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
494 spin_unlock(&bdi->wb.list_lock);
495 spin_unlock(&dst->list_lock);
496 }
497
498 bdi_unregister(bdi);
499 449
450 WARN_ON(!list_empty(&bdi->work_list));
500 WARN_ON(delayed_work_pending(&bdi->wb.dwork)); 451 WARN_ON(delayed_work_pending(&bdi->wb.dwork));
501 452
453 if (bdi->dev) {
454 bdi_debug_unregister(bdi);
455 device_unregister(bdi->dev);
456 bdi->dev = NULL;
457 }
458
502 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 459 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
503 percpu_counter_destroy(&bdi->bdi_stat[i]); 460 percpu_counter_destroy(&bdi->bdi_stat[i]);
504
505 fprop_local_destroy_percpu(&bdi->completions); 461 fprop_local_destroy_percpu(&bdi->completions);
506} 462}
507EXPORT_SYMBOL(bdi_destroy); 463EXPORT_SYMBOL(bdi_destroy);