diff options
Diffstat (limited to 'fs/fs-writeback.c')
| -rw-r--r-- | fs/fs-writeback.c | 465 |
1 files changed, 156 insertions, 309 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1d1088f48bc2..d5be1693ac93 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -38,51 +38,18 @@ int nr_pdflush_threads; | |||
| 38 | /* | 38 | /* |
| 39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 39 | * Passed into wb_writeback(), essentially a subset of writeback_control |
| 40 | */ | 40 | */ |
| 41 | struct wb_writeback_args { | 41 | struct wb_writeback_work { |
| 42 | long nr_pages; | 42 | long nr_pages; |
| 43 | struct super_block *sb; | 43 | struct super_block *sb; |
| 44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
| 45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
| 46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
| 47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
| 48 | }; | ||
| 49 | 48 | ||
| 50 | /* | ||
| 51 | * Work items for the bdi_writeback threads | ||
| 52 | */ | ||
| 53 | struct bdi_work { | ||
| 54 | struct list_head list; /* pending work list */ | 49 | struct list_head list; /* pending work list */ |
| 55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 50 | struct completion *done; /* set if the caller waits */ |
| 56 | |||
| 57 | unsigned long seen; /* threads that have seen this work */ | ||
| 58 | atomic_t pending; /* number of threads still to do work */ | ||
| 59 | |||
| 60 | struct wb_writeback_args args; /* writeback arguments */ | ||
| 61 | |||
| 62 | unsigned long state; /* flag bits, see WS_* */ | ||
| 63 | }; | 51 | }; |
| 64 | 52 | ||
| 65 | enum { | ||
| 66 | WS_USED_B = 0, | ||
| 67 | WS_ONSTACK_B, | ||
| 68 | }; | ||
| 69 | |||
| 70 | #define WS_USED (1 << WS_USED_B) | ||
| 71 | #define WS_ONSTACK (1 << WS_ONSTACK_B) | ||
| 72 | |||
| 73 | static inline bool bdi_work_on_stack(struct bdi_work *work) | ||
| 74 | { | ||
| 75 | return test_bit(WS_ONSTACK_B, &work->state); | ||
| 76 | } | ||
| 77 | |||
| 78 | static inline void bdi_work_init(struct bdi_work *work, | ||
| 79 | struct wb_writeback_args *args) | ||
| 80 | { | ||
| 81 | INIT_RCU_HEAD(&work->rcu_head); | ||
| 82 | work->args = *args; | ||
| 83 | work->state = WS_USED; | ||
| 84 | } | ||
| 85 | |||
| 86 | /** | 53 | /** |
| 87 | * writeback_in_progress - determine whether there is writeback in progress | 54 | * writeback_in_progress - determine whether there is writeback in progress |
| 88 | * @bdi: the device's backing_dev_info structure. | 55 | * @bdi: the device's backing_dev_info structure. |
| @@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
| 95 | return !list_empty(&bdi->work_list); | 62 | return !list_empty(&bdi->work_list); |
| 96 | } | 63 | } |
| 97 | 64 | ||
| 98 | static void bdi_work_clear(struct bdi_work *work) | 65 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| 99 | { | 66 | struct wb_writeback_work *work) |
| 100 | clear_bit(WS_USED_B, &work->state); | ||
| 101 | smp_mb__after_clear_bit(); | ||
| 102 | /* | ||
| 103 | * work can have disappeared at this point. bit waitq functions | ||
| 104 | * should be able to tolerate this, provided bdi_sched_wait does | ||
| 105 | * not dereference it's pointer argument. | ||
| 106 | */ | ||
| 107 | wake_up_bit(&work->state, WS_USED_B); | ||
| 108 | } | ||
| 109 | |||
| 110 | static void bdi_work_free(struct rcu_head *head) | ||
| 111 | { | ||
| 112 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
| 113 | |||
| 114 | if (!bdi_work_on_stack(work)) | ||
| 115 | kfree(work); | ||
| 116 | else | ||
| 117 | bdi_work_clear(work); | ||
| 118 | } | ||
| 119 | |||
| 120 | static void wb_work_complete(struct bdi_work *work) | ||
| 121 | { | ||
| 122 | const enum writeback_sync_modes sync_mode = work->args.sync_mode; | ||
| 123 | int onstack = bdi_work_on_stack(work); | ||
| 124 | |||
| 125 | /* | ||
| 126 | * For allocated work, we can clear the done/seen bit right here. | ||
| 127 | * For on-stack work, we need to postpone both the clear and free | ||
| 128 | * to after the RCU grace period, since the stack could be invalidated | ||
| 129 | * as soon as bdi_work_clear() has done the wakeup. | ||
| 130 | */ | ||
| 131 | if (!onstack) | ||
| 132 | bdi_work_clear(work); | ||
| 133 | if (sync_mode == WB_SYNC_NONE || onstack) | ||
| 134 | call_rcu(&work->rcu_head, bdi_work_free); | ||
| 135 | } | ||
| 136 | |||
| 137 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
| 138 | { | ||
| 139 | /* | ||
| 140 | * The caller has retrieved the work arguments from this work, | ||
| 141 | * drop our reference. If this is the last ref, delete and free it | ||
| 142 | */ | ||
| 143 | if (atomic_dec_and_test(&work->pending)) { | ||
| 144 | struct backing_dev_info *bdi = wb->bdi; | ||
| 145 | |||
| 146 | spin_lock(&bdi->wb_lock); | ||
| 147 | list_del_rcu(&work->list); | ||
| 148 | spin_unlock(&bdi->wb_lock); | ||
| 149 | |||
| 150 | wb_work_complete(work); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
| 155 | { | 67 | { |
| 156 | work->seen = bdi->wb_mask; | ||
| 157 | BUG_ON(!work->seen); | ||
| 158 | atomic_set(&work->pending, bdi->wb_cnt); | ||
| 159 | BUG_ON(!bdi->wb_cnt); | ||
| 160 | |||
| 161 | /* | ||
| 162 | * list_add_tail_rcu() contains the necessary barriers to | ||
| 163 | * make sure the above stores are seen before the item is | ||
| 164 | * noticed on the list | ||
| 165 | */ | ||
| 166 | spin_lock(&bdi->wb_lock); | 68 | spin_lock(&bdi->wb_lock); |
| 167 | list_add_tail_rcu(&work->list, &bdi->work_list); | 69 | list_add_tail(&work->list, &bdi->work_list); |
| 168 | spin_unlock(&bdi->wb_lock); | 70 | spin_unlock(&bdi->wb_lock); |
| 169 | 71 | ||
| 170 | /* | 72 | /* |
| @@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
| 181 | } | 83 | } |
| 182 | } | 84 | } |
| 183 | 85 | ||
| 184 | /* | 86 | static void |
| 185 | * Used for on-stack allocated work items. The caller needs to wait until | 87 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
| 186 | * the wb threads have acked the work before it's safe to continue. | 88 | bool range_cyclic, bool for_background) |
| 187 | */ | ||
| 188 | static void bdi_wait_on_work_clear(struct bdi_work *work) | ||
| 189 | { | ||
| 190 | wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, | ||
| 191 | TASK_UNINTERRUPTIBLE); | ||
| 192 | } | ||
| 193 | |||
| 194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
| 195 | struct wb_writeback_args *args) | ||
| 196 | { | 89 | { |
| 197 | struct bdi_work *work; | 90 | struct wb_writeback_work *work; |
| 198 | 91 | ||
| 199 | /* | 92 | /* |
| 200 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 93 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
| 201 | * wakeup the thread for old dirty data writeback | 94 | * wakeup the thread for old dirty data writeback |
| 202 | */ | 95 | */ |
| 203 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| 204 | if (work) { | 97 | if (!work) { |
| 205 | bdi_work_init(work, args); | 98 | if (bdi->wb.task) |
| 206 | bdi_queue_work(bdi, work); | 99 | wake_up_process(bdi->wb.task); |
| 207 | } else { | 100 | return; |
| 208 | struct bdi_writeback *wb = &bdi->wb; | ||
| 209 | |||
| 210 | if (wb->task) | ||
| 211 | wake_up_process(wb->task); | ||
| 212 | } | 101 | } |
| 102 | |||
| 103 | work->sync_mode = WB_SYNC_NONE; | ||
| 104 | work->nr_pages = nr_pages; | ||
| 105 | work->range_cyclic = range_cyclic; | ||
| 106 | work->for_background = for_background; | ||
| 107 | |||
| 108 | bdi_queue_work(bdi, work); | ||
| 213 | } | 109 | } |
| 214 | 110 | ||
| 215 | /** | 111 | /** |
| 216 | * bdi_sync_writeback - start and wait for writeback | 112 | * bdi_start_writeback - start writeback |
| 217 | * @bdi: the backing device to write from | 113 | * @bdi: the backing device to write from |
| 218 | * @sb: write inodes from this super_block | 114 | * @nr_pages: the number of pages to write |
| 219 | * | 115 | * |
| 220 | * Description: | 116 | * Description: |
| 221 | * This does WB_SYNC_ALL data integrity writeback and waits for the | 117 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
| 222 | * IO to complete. Callers must hold the sb s_umount semaphore for | 118 | * started when this function returns, we make no guarentees on |
| 223 | * reading, to avoid having the super disappear before we are done. | 119 | * completion. Caller need not hold sb s_umount semaphore. |
| 120 | * | ||
| 224 | */ | 121 | */ |
| 225 | static void bdi_sync_writeback(struct backing_dev_info *bdi, | 122 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
| 226 | struct super_block *sb) | ||
| 227 | { | 123 | { |
| 228 | struct wb_writeback_args args = { | 124 | __bdi_start_writeback(bdi, nr_pages, true, false); |
| 229 | .sb = sb, | ||
| 230 | .sync_mode = WB_SYNC_ALL, | ||
| 231 | .nr_pages = LONG_MAX, | ||
| 232 | .range_cyclic = 0, | ||
| 233 | }; | ||
| 234 | struct bdi_work work; | ||
| 235 | |||
| 236 | bdi_work_init(&work, &args); | ||
| 237 | work.state |= WS_ONSTACK; | ||
| 238 | |||
| 239 | bdi_queue_work(bdi, &work); | ||
| 240 | bdi_wait_on_work_clear(&work); | ||
| 241 | } | 125 | } |
| 242 | 126 | ||
| 243 | /** | 127 | /** |
| 244 | * bdi_start_writeback - start writeback | 128 | * bdi_start_background_writeback - start background writeback |
| 245 | * @bdi: the backing device to write from | 129 | * @bdi: the backing device to write from |
| 246 | * @sb: write inodes from this super_block | ||
| 247 | * @nr_pages: the number of pages to write | ||
| 248 | * | 130 | * |
| 249 | * Description: | 131 | * Description: |
| 250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 132 | * This does WB_SYNC_NONE background writeback. The IO is only |
| 251 | * started when this function returns, we make no guarentees on | 133 | * started when this function returns, we make no guarentees on |
| 252 | * completion. Caller need not hold sb s_umount semaphore. | 134 | * completion. Caller need not hold sb s_umount semaphore. |
| 253 | * | ||
| 254 | */ | 135 | */ |
| 255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 136 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
| 256 | long nr_pages) | ||
| 257 | { | 137 | { |
| 258 | struct wb_writeback_args args = { | 138 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
| 259 | .sb = sb, | ||
| 260 | .sync_mode = WB_SYNC_NONE, | ||
| 261 | .nr_pages = nr_pages, | ||
| 262 | .range_cyclic = 1, | ||
| 263 | }; | ||
| 264 | |||
| 265 | /* | ||
| 266 | * We treat @nr_pages=0 as the special case to do background writeback, | ||
| 267 | * ie. to sync pages until the background dirty threshold is reached. | ||
| 268 | */ | ||
| 269 | if (!nr_pages) { | ||
| 270 | args.nr_pages = LONG_MAX; | ||
| 271 | args.for_background = 1; | ||
| 272 | } | ||
| 273 | |||
| 274 | bdi_alloc_queue_work(bdi, &args); | ||
| 275 | } | 139 | } |
| 276 | 140 | ||
| 277 | /* | 141 | /* |
| @@ -561,75 +425,69 @@ select_queue: | |||
| 561 | return ret; | 425 | return ret; |
| 562 | } | 426 | } |
| 563 | 427 | ||
| 564 | static void unpin_sb_for_writeback(struct super_block *sb) | ||
| 565 | { | ||
| 566 | up_read(&sb->s_umount); | ||
| 567 | put_super(sb); | ||
| 568 | } | ||
| 569 | |||
| 570 | enum sb_pin_state { | ||
| 571 | SB_PINNED, | ||
| 572 | SB_NOT_PINNED, | ||
| 573 | SB_PIN_FAILED | ||
| 574 | }; | ||
| 575 | |||
| 576 | /* | 428 | /* |
| 577 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 429 | * For background writeback the caller does not have the sb pinned |
| 578 | * before calling writeback. So make sure that we do pin it, so it doesn't | 430 | * before calling writeback. So make sure that we do pin it, so it doesn't |
| 579 | * go away while we are writing inodes from it. | 431 | * go away while we are writing inodes from it. |
| 580 | */ | 432 | */ |
| 581 | static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | 433 | static bool pin_sb_for_writeback(struct super_block *sb) |
| 582 | struct super_block *sb) | ||
| 583 | { | 434 | { |
| 584 | /* | ||
| 585 | * Caller must already hold the ref for this | ||
| 586 | */ | ||
| 587 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
| 588 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
| 589 | return SB_NOT_PINNED; | ||
| 590 | } | ||
| 591 | spin_lock(&sb_lock); | 435 | spin_lock(&sb_lock); |
| 436 | if (list_empty(&sb->s_instances)) { | ||
| 437 | spin_unlock(&sb_lock); | ||
| 438 | return false; | ||
| 439 | } | ||
| 440 | |||
| 592 | sb->s_count++; | 441 | sb->s_count++; |
| 442 | spin_unlock(&sb_lock); | ||
| 443 | |||
| 593 | if (down_read_trylock(&sb->s_umount)) { | 444 | if (down_read_trylock(&sb->s_umount)) { |
| 594 | if (sb->s_root) { | 445 | if (sb->s_root) |
| 595 | spin_unlock(&sb_lock); | 446 | return true; |
| 596 | return SB_PINNED; | ||
| 597 | } | ||
| 598 | /* | ||
| 599 | * umounted, drop rwsem again and fall through to failure | ||
| 600 | */ | ||
| 601 | up_read(&sb->s_umount); | 447 | up_read(&sb->s_umount); |
| 602 | } | 448 | } |
| 603 | sb->s_count--; | 449 | |
| 604 | spin_unlock(&sb_lock); | 450 | put_super(sb); |
| 605 | return SB_PIN_FAILED; | 451 | return false; |
| 606 | } | 452 | } |
| 607 | 453 | ||
| 608 | /* | 454 | /* |
| 609 | * Write a portion of b_io inodes which belong to @sb. | 455 | * Write a portion of b_io inodes which belong to @sb. |
| 610 | * If @wbc->sb != NULL, then find and write all such | 456 | * |
| 457 | * If @only_this_sb is true, then find and write all such | ||
| 611 | * inodes. Otherwise write only ones which go sequentially | 458 | * inodes. Otherwise write only ones which go sequentially |
| 612 | * in reverse order. | 459 | * in reverse order. |
| 460 | * | ||
| 613 | * Return 1, if the caller writeback routine should be | 461 | * Return 1, if the caller writeback routine should be |
| 614 | * interrupted. Otherwise return 0. | 462 | * interrupted. Otherwise return 0. |
| 615 | */ | 463 | */ |
| 616 | static int writeback_sb_inodes(struct super_block *sb, | 464 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
| 617 | struct bdi_writeback *wb, | 465 | struct writeback_control *wbc, bool only_this_sb) |
| 618 | struct writeback_control *wbc) | ||
| 619 | { | 466 | { |
| 620 | while (!list_empty(&wb->b_io)) { | 467 | while (!list_empty(&wb->b_io)) { |
| 621 | long pages_skipped; | 468 | long pages_skipped; |
| 622 | struct inode *inode = list_entry(wb->b_io.prev, | 469 | struct inode *inode = list_entry(wb->b_io.prev, |
| 623 | struct inode, i_list); | 470 | struct inode, i_list); |
| 624 | if (wbc->sb && sb != inode->i_sb) { | 471 | |
| 625 | /* super block given and doesn't | 472 | if (inode->i_sb != sb) { |
| 626 | match, skip this inode */ | 473 | if (only_this_sb) { |
| 627 | redirty_tail(inode); | 474 | /* |
| 628 | continue; | 475 | * We only want to write back data for this |
| 629 | } | 476 | * superblock, move all inodes not belonging |
| 630 | if (sb != inode->i_sb) | 477 | * to it back onto the dirty list. |
| 631 | /* finish with this superblock */ | 478 | */ |
| 479 | redirty_tail(inode); | ||
| 480 | continue; | ||
| 481 | } | ||
| 482 | |||
| 483 | /* | ||
| 484 | * The inode belongs to a different superblock. | ||
| 485 | * Bounce back to the caller to unpin this and | ||
| 486 | * pin the next superblock. | ||
| 487 | */ | ||
| 632 | return 0; | 488 | return 0; |
| 489 | } | ||
| 490 | |||
| 633 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 491 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
| 634 | requeue_io(inode); | 492 | requeue_io(inode); |
| 635 | continue; | 493 | continue; |
| @@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
| 667 | return 1; | 525 | return 1; |
| 668 | } | 526 | } |
| 669 | 527 | ||
| 670 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 528 | void writeback_inodes_wb(struct bdi_writeback *wb, |
| 671 | struct writeback_control *wbc) | 529 | struct writeback_control *wbc) |
| 672 | { | 530 | { |
| 673 | int ret = 0; | 531 | int ret = 0; |
| 674 | 532 | ||
| @@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 681 | struct inode *inode = list_entry(wb->b_io.prev, | 539 | struct inode *inode = list_entry(wb->b_io.prev, |
| 682 | struct inode, i_list); | 540 | struct inode, i_list); |
| 683 | struct super_block *sb = inode->i_sb; | 541 | struct super_block *sb = inode->i_sb; |
| 684 | enum sb_pin_state state; | ||
| 685 | 542 | ||
| 686 | if (wbc->sb && sb != wbc->sb) { | 543 | if (!pin_sb_for_writeback(sb)) { |
| 687 | /* super block given and doesn't | ||
| 688 | match, skip this inode */ | ||
| 689 | redirty_tail(inode); | ||
| 690 | continue; | ||
| 691 | } | ||
| 692 | state = pin_sb_for_writeback(wbc, sb); | ||
| 693 | |||
| 694 | if (state == SB_PIN_FAILED) { | ||
| 695 | requeue_io(inode); | 544 | requeue_io(inode); |
| 696 | continue; | 545 | continue; |
| 697 | } | 546 | } |
| 698 | ret = writeback_sb_inodes(sb, wb, wbc); | 547 | ret = writeback_sb_inodes(sb, wb, wbc, false); |
| 548 | drop_super(sb); | ||
| 699 | 549 | ||
| 700 | if (state == SB_PINNED) | ||
| 701 | unpin_sb_for_writeback(sb); | ||
| 702 | if (ret) | 550 | if (ret) |
| 703 | break; | 551 | break; |
| 704 | } | 552 | } |
| @@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 706 | /* Leave any unwritten inodes on b_io */ | 554 | /* Leave any unwritten inodes on b_io */ |
| 707 | } | 555 | } |
| 708 | 556 | ||
| 709 | void writeback_inodes_wbc(struct writeback_control *wbc) | 557 | static void __writeback_inodes_sb(struct super_block *sb, |
| 558 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
| 710 | { | 559 | { |
| 711 | struct backing_dev_info *bdi = wbc->bdi; | 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 712 | 561 | ||
| 713 | writeback_inodes_wb(&bdi->wb, wbc); | 562 | wbc->wb_start = jiffies; /* livelock avoidance */ |
| 563 | spin_lock(&inode_lock); | ||
| 564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
| 565 | queue_io(wb, wbc->older_than_this); | ||
| 566 | writeback_sb_inodes(sb, wb, wbc, true); | ||
| 567 | spin_unlock(&inode_lock); | ||
| 714 | } | 568 | } |
| 715 | 569 | ||
| 716 | /* | 570 | /* |
| @@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void) | |||
| 748 | * all dirty pages if they are all attached to "old" mappings. | 602 | * all dirty pages if they are all attached to "old" mappings. |
| 749 | */ | 603 | */ |
| 750 | static long wb_writeback(struct bdi_writeback *wb, | 604 | static long wb_writeback(struct bdi_writeback *wb, |
| 751 | struct wb_writeback_args *args) | 605 | struct wb_writeback_work *work) |
| 752 | { | 606 | { |
| 753 | struct writeback_control wbc = { | 607 | struct writeback_control wbc = { |
| 754 | .bdi = wb->bdi, | 608 | .sync_mode = work->sync_mode, |
| 755 | .sb = args->sb, | ||
| 756 | .sync_mode = args->sync_mode, | ||
| 757 | .older_than_this = NULL, | 609 | .older_than_this = NULL, |
| 758 | .for_kupdate = args->for_kupdate, | 610 | .for_kupdate = work->for_kupdate, |
| 759 | .for_background = args->for_background, | 611 | .for_background = work->for_background, |
| 760 | .range_cyclic = args->range_cyclic, | 612 | .range_cyclic = work->range_cyclic, |
| 761 | }; | 613 | }; |
| 762 | unsigned long oldest_jif; | 614 | unsigned long oldest_jif; |
| 763 | long wrote = 0; | 615 | long wrote = 0; |
| @@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 777 | /* | 629 | /* |
| 778 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
| 779 | */ | 631 | */ |
| 780 | if (args->nr_pages <= 0) | 632 | if (work->nr_pages <= 0) |
| 781 | break; | 633 | break; |
| 782 | 634 | ||
| 783 | /* | 635 | /* |
| 784 | * For background writeout, stop when we are below the | 636 | * For background writeout, stop when we are below the |
| 785 | * background dirty threshold | 637 | * background dirty threshold |
| 786 | */ | 638 | */ |
| 787 | if (args->for_background && !over_bground_thresh()) | 639 | if (work->for_background && !over_bground_thresh()) |
| 788 | break; | 640 | break; |
| 789 | 641 | ||
| 790 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
| 791 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
| 792 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
| 793 | writeback_inodes_wb(wb, &wbc); | 645 | if (work->sb) |
| 794 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | __writeback_inodes_sb(work->sb, wb, &wbc); |
| 647 | else | ||
| 648 | writeback_inodes_wb(wb, &wbc); | ||
| 649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
| 795 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
| 796 | 651 | ||
| 797 | /* | 652 | /* |
| @@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 827 | } | 682 | } |
| 828 | 683 | ||
| 829 | /* | 684 | /* |
| 830 | * Return the next bdi_work struct that hasn't been processed by this | 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
| 831 | * wb thread yet. ->seen is initially set for each thread that exists | ||
| 832 | * for this device, when a thread first notices a piece of work it | ||
| 833 | * clears its bit. Depending on writeback type, the thread will notify | ||
| 834 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
| 835 | * it is done (WB_SYNC_ALL). | ||
| 836 | */ | 686 | */ |
| 837 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 687 | static struct wb_writeback_work * |
| 838 | struct bdi_writeback *wb) | 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) |
| 839 | { | 689 | { |
| 840 | struct bdi_work *work, *ret = NULL; | 690 | struct wb_writeback_work *work = NULL; |
| 841 | 691 | ||
| 842 | rcu_read_lock(); | 692 | spin_lock(&bdi->wb_lock); |
| 843 | 693 | if (!list_empty(&bdi->work_list)) { | |
| 844 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | 694 | work = list_entry(bdi->work_list.next, |
| 845 | if (!test_bit(wb->nr, &work->seen)) | 695 | struct wb_writeback_work, list); |
| 846 | continue; | 696 | list_del_init(&work->list); |
| 847 | clear_bit(wb->nr, &work->seen); | ||
| 848 | |||
| 849 | ret = work; | ||
| 850 | break; | ||
| 851 | } | 697 | } |
| 852 | 698 | spin_unlock(&bdi->wb_lock); | |
| 853 | rcu_read_unlock(); | 699 | return work; |
| 854 | return ret; | ||
| 855 | } | 700 | } |
| 856 | 701 | ||
| 857 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 702 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
| @@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
| 876 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 721 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 877 | 722 | ||
| 878 | if (nr_pages) { | 723 | if (nr_pages) { |
| 879 | struct wb_writeback_args args = { | 724 | struct wb_writeback_work work = { |
| 880 | .nr_pages = nr_pages, | 725 | .nr_pages = nr_pages, |
| 881 | .sync_mode = WB_SYNC_NONE, | 726 | .sync_mode = WB_SYNC_NONE, |
| 882 | .for_kupdate = 1, | 727 | .for_kupdate = 1, |
| 883 | .range_cyclic = 1, | 728 | .range_cyclic = 1, |
| 884 | }; | 729 | }; |
| 885 | 730 | ||
| 886 | return wb_writeback(wb, &args); | 731 | return wb_writeback(wb, &work); |
| 887 | } | 732 | } |
| 888 | 733 | ||
| 889 | return 0; | 734 | return 0; |
| @@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
| 895 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 740 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
| 896 | { | 741 | { |
| 897 | struct backing_dev_info *bdi = wb->bdi; | 742 | struct backing_dev_info *bdi = wb->bdi; |
| 898 | struct bdi_work *work; | 743 | struct wb_writeback_work *work; |
| 899 | long wrote = 0; | 744 | long wrote = 0; |
| 900 | 745 | ||
| 901 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
| 902 | struct wb_writeback_args args = work->args; | ||
| 903 | |||
| 904 | /* | 747 | /* |
| 905 | * Override sync mode, in case we must wait for completion | 748 | * Override sync mode, in case we must wait for completion |
| 749 | * because this thread is exiting now. | ||
| 906 | */ | 750 | */ |
| 907 | if (force_wait) | 751 | if (force_wait) |
| 908 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 752 | work->sync_mode = WB_SYNC_ALL; |
| 909 | |||
| 910 | /* | ||
| 911 | * If this isn't a data integrity operation, just notify | ||
| 912 | * that we have seen this work and we are now starting it. | ||
| 913 | */ | ||
| 914 | if (args.sync_mode == WB_SYNC_NONE) | ||
| 915 | wb_clear_pending(wb, work); | ||
| 916 | 753 | ||
| 917 | wrote += wb_writeback(wb, &args); | 754 | wrote += wb_writeback(wb, work); |
| 918 | 755 | ||
| 919 | /* | 756 | /* |
| 920 | * This is a data integrity writeback, so only do the | 757 | * Notify the caller of completion if this is a synchronous |
| 921 | * notification when we have completed the work. | 758 | * work item, otherwise just free it. |
| 922 | */ | 759 | */ |
| 923 | if (args.sync_mode == WB_SYNC_ALL) | 760 | if (work->done) |
| 924 | wb_clear_pending(wb, work); | 761 | complete(work->done); |
| 762 | else | ||
| 763 | kfree(work); | ||
| 925 | } | 764 | } |
| 926 | 765 | ||
| 927 | /* | 766 | /* |
| @@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
| 978 | } | 817 | } |
| 979 | 818 | ||
| 980 | /* | 819 | /* |
| 981 | * Schedule writeback for all backing devices. This does WB_SYNC_NONE | 820 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
| 982 | * writeback, for integrity writeback see bdi_sync_writeback(). | 821 | * the whole world. |
| 983 | */ | 822 | */ |
| 984 | static void bdi_writeback_all(struct super_block *sb, long nr_pages) | 823 | void wakeup_flusher_threads(long nr_pages) |
| 985 | { | 824 | { |
| 986 | struct wb_writeback_args args = { | ||
| 987 | .sb = sb, | ||
| 988 | .nr_pages = nr_pages, | ||
| 989 | .sync_mode = WB_SYNC_NONE, | ||
| 990 | }; | ||
| 991 | struct backing_dev_info *bdi; | 825 | struct backing_dev_info *bdi; |
| 992 | 826 | ||
| 993 | rcu_read_lock(); | 827 | if (!nr_pages) { |
| 828 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
| 829 | global_page_state(NR_UNSTABLE_NFS); | ||
| 830 | } | ||
| 994 | 831 | ||
| 832 | rcu_read_lock(); | ||
| 995 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 833 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
| 996 | if (!bdi_has_dirty_io(bdi)) | 834 | if (!bdi_has_dirty_io(bdi)) |
| 997 | continue; | 835 | continue; |
| 998 | 836 | __bdi_start_writeback(bdi, nr_pages, false, false); | |
| 999 | bdi_alloc_queue_work(bdi, &args); | ||
| 1000 | } | 837 | } |
| 1001 | |||
| 1002 | rcu_read_unlock(); | 838 | rcu_read_unlock(); |
| 1003 | } | 839 | } |
| 1004 | 840 | ||
| 1005 | /* | ||
| 1006 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | ||
| 1007 | * the whole world. | ||
| 1008 | */ | ||
| 1009 | void wakeup_flusher_threads(long nr_pages) | ||
| 1010 | { | ||
| 1011 | if (nr_pages == 0) | ||
| 1012 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
| 1013 | global_page_state(NR_UNSTABLE_NFS); | ||
| 1014 | bdi_writeback_all(NULL, nr_pages); | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 841 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
| 1018 | { | 842 | { |
| 1019 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 843 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
| @@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
| 1218 | { | 1042 | { |
| 1219 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1043 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
| 1220 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1044 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
| 1221 | long nr_to_write; | 1045 | DECLARE_COMPLETION_ONSTACK(done); |
| 1046 | struct wb_writeback_work work = { | ||
| 1047 | .sb = sb, | ||
| 1048 | .sync_mode = WB_SYNC_NONE, | ||
| 1049 | .done = &done, | ||
| 1050 | }; | ||
| 1051 | |||
| 1052 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
| 1222 | 1053 | ||
| 1223 | nr_to_write = nr_dirty + nr_unstable + | 1054 | work.nr_pages = nr_dirty + nr_unstable + |
| 1224 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1055 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 1225 | 1056 | ||
| 1226 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | 1057 | bdi_queue_work(sb->s_bdi, &work); |
| 1058 | wait_for_completion(&done); | ||
| 1227 | } | 1059 | } |
| 1228 | EXPORT_SYMBOL(writeback_inodes_sb); | 1060 | EXPORT_SYMBOL(writeback_inodes_sb); |
| 1229 | 1061 | ||
| @@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb); | |||
| 1237 | int writeback_inodes_sb_if_idle(struct super_block *sb) | 1069 | int writeback_inodes_sb_if_idle(struct super_block *sb) |
| 1238 | { | 1070 | { |
| 1239 | if (!writeback_in_progress(sb->s_bdi)) { | 1071 | if (!writeback_in_progress(sb->s_bdi)) { |
| 1072 | down_read(&sb->s_umount); | ||
| 1240 | writeback_inodes_sb(sb); | 1073 | writeback_inodes_sb(sb); |
| 1074 | up_read(&sb->s_umount); | ||
| 1241 | return 1; | 1075 | return 1; |
| 1242 | } else | 1076 | } else |
| 1243 | return 0; | 1077 | return 0; |
| @@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
| 1253 | */ | 1087 | */ |
| 1254 | void sync_inodes_sb(struct super_block *sb) | 1088 | void sync_inodes_sb(struct super_block *sb) |
| 1255 | { | 1089 | { |
| 1256 | bdi_sync_writeback(sb->s_bdi, sb); | 1090 | DECLARE_COMPLETION_ONSTACK(done); |
| 1091 | struct wb_writeback_work work = { | ||
| 1092 | .sb = sb, | ||
| 1093 | .sync_mode = WB_SYNC_ALL, | ||
| 1094 | .nr_pages = LONG_MAX, | ||
| 1095 | .range_cyclic = 0, | ||
| 1096 | .done = &done, | ||
| 1097 | }; | ||
| 1098 | |||
| 1099 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
| 1100 | |||
| 1101 | bdi_queue_work(sb->s_bdi, &work); | ||
| 1102 | wait_for_completion(&done); | ||
| 1103 | |||
| 1257 | wait_sb_inodes(sb); | 1104 | wait_sb_inodes(sb); |
| 1258 | } | 1105 | } |
| 1259 | EXPORT_SYMBOL(sync_inodes_sb); | 1106 | EXPORT_SYMBOL(sync_inodes_sb); |
