diff options
Diffstat (limited to 'fs/fs-writeback.c')
| -rw-r--r-- | fs/fs-writeback.c | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b40168fcc94a..36855c1f8daf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -331,11 +331,22 @@ struct inode_switch_wbs_context { | |||
| 331 | struct work_struct work; | 331 | struct work_struct work; |
| 332 | }; | 332 | }; |
| 333 | 333 | ||
| 334 | static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) | ||
| 335 | { | ||
| 336 | down_write(&bdi->wb_switch_rwsem); | ||
| 337 | } | ||
| 338 | |||
| 339 | static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) | ||
| 340 | { | ||
| 341 | up_write(&bdi->wb_switch_rwsem); | ||
| 342 | } | ||
| 343 | |||
| 334 | static void inode_switch_wbs_work_fn(struct work_struct *work) | 344 | static void inode_switch_wbs_work_fn(struct work_struct *work) |
| 335 | { | 345 | { |
| 336 | struct inode_switch_wbs_context *isw = | 346 | struct inode_switch_wbs_context *isw = |
| 337 | container_of(work, struct inode_switch_wbs_context, work); | 347 | container_of(work, struct inode_switch_wbs_context, work); |
| 338 | struct inode *inode = isw->inode; | 348 | struct inode *inode = isw->inode; |
| 349 | struct backing_dev_info *bdi = inode_to_bdi(inode); | ||
| 339 | struct address_space *mapping = inode->i_mapping; | 350 | struct address_space *mapping = inode->i_mapping; |
| 340 | struct bdi_writeback *old_wb = inode->i_wb; | 351 | struct bdi_writeback *old_wb = inode->i_wb; |
| 341 | struct bdi_writeback *new_wb = isw->new_wb; | 352 | struct bdi_writeback *new_wb = isw->new_wb; |
| @@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
| 344 | bool switched = false; | 355 | bool switched = false; |
| 345 | 356 | ||
| 346 | /* | 357 | /* |
| 358 | * If @inode switches cgwb membership while sync_inodes_sb() is | ||
| 359 | * being issued, sync_inodes_sb() might miss it. Synchronize. | ||
| 360 | */ | ||
| 361 | down_read(&bdi->wb_switch_rwsem); | ||
| 362 | |||
| 363 | /* | ||
| 347 | * By the time control reaches here, RCU grace period has passed | 364 | * By the time control reaches here, RCU grace period has passed |
| 348 | * since I_WB_SWITCH assertion and all wb stat update transactions | 365 | * since I_WB_SWITCH assertion and all wb stat update transactions |
| 349 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be | 366 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be |
| @@ -428,6 +445,8 @@ skip_switch: | |||
| 428 | spin_unlock(&new_wb->list_lock); | 445 | spin_unlock(&new_wb->list_lock); |
| 429 | spin_unlock(&old_wb->list_lock); | 446 | spin_unlock(&old_wb->list_lock); |
| 430 | 447 | ||
| 448 | up_read(&bdi->wb_switch_rwsem); | ||
| 449 | |||
| 431 | if (switched) { | 450 | if (switched) { |
| 432 | wb_wakeup(new_wb); | 451 | wb_wakeup(new_wb); |
| 433 | wb_put(old_wb); | 452 | wb_put(old_wb); |
| @@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
| 468 | if (inode->i_state & I_WB_SWITCH) | 487 | if (inode->i_state & I_WB_SWITCH) |
| 469 | return; | 488 | return; |
| 470 | 489 | ||
| 490 | /* | ||
| 491 | * Avoid starting new switches while sync_inodes_sb() is in | ||
| 492 | * progress. Otherwise, if the down_write protected issue path | ||
| 493 | * blocks heavily, we might end up starting a large number of | ||
| 494 | * switches which will block on the rwsem. | ||
| 495 | */ | ||
| 496 | if (!down_read_trylock(&bdi->wb_switch_rwsem)) | ||
| 497 | return; | ||
| 498 | |||
| 471 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); | 499 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); |
| 472 | if (!isw) | 500 | if (!isw) |
| 473 | return; | 501 | goto out_unlock; |
| 474 | 502 | ||
| 475 | /* find and pin the new wb */ | 503 | /* find and pin the new wb */ |
| 476 | rcu_read_lock(); | 504 | rcu_read_lock(); |
| @@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
| 504 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. | 532 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. |
| 505 | */ | 533 | */ |
| 506 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); | 534 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
| 507 | return; | 535 | goto out_unlock; |
| 508 | 536 | ||
| 509 | out_free: | 537 | out_free: |
| 510 | if (isw->new_wb) | 538 | if (isw->new_wb) |
| 511 | wb_put(isw->new_wb); | 539 | wb_put(isw->new_wb); |
| 512 | kfree(isw); | 540 | kfree(isw); |
| 541 | out_unlock: | ||
| 542 | up_read(&bdi->wb_switch_rwsem); | ||
| 513 | } | 543 | } |
| 514 | 544 | ||
| 515 | /** | 545 | /** |
| @@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init); | |||
| 887 | 917 | ||
| 888 | #else /* CONFIG_CGROUP_WRITEBACK */ | 918 | #else /* CONFIG_CGROUP_WRITEBACK */ |
| 889 | 919 | ||
| 920 | static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } | ||
| 921 | static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } | ||
| 922 | |||
| 890 | static struct bdi_writeback * | 923 | static struct bdi_writeback * |
| 891 | locked_inode_to_wb_and_lock_list(struct inode *inode) | 924 | locked_inode_to_wb_and_lock_list(struct inode *inode) |
| 892 | __releases(&inode->i_lock) | 925 | __releases(&inode->i_lock) |
| @@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb) | |||
| 2413 | return; | 2446 | return; |
| 2414 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 2447 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 2415 | 2448 | ||
| 2449 | /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ | ||
| 2450 | bdi_down_write_wb_switch_rwsem(bdi); | ||
| 2416 | bdi_split_work_to_wbs(bdi, &work, false); | 2451 | bdi_split_work_to_wbs(bdi, &work, false); |
| 2417 | wb_wait_for_completion(bdi, &done); | 2452 | wb_wait_for_completion(bdi, &done); |
| 2453 | bdi_up_write_wb_switch_rwsem(bdi); | ||
| 2418 | 2454 | ||
| 2419 | wait_sb_inodes(sb); | 2455 | wait_sb_inodes(sb); |
| 2420 | } | 2456 | } |
