aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2010-07-25 07:29:20 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-08-07 12:53:56 -0400
commitfff5b85aa4225a7be157f208277a055822039a9e (patch)
treef4310bf188ef0a1dac52da39b61968aa566a717e
parentadf392407076b85816d48714fb8eeaedb2157884 (diff)
writeback: move bdi threads exiting logic to the forker thread
Currently, bdi threads can decide to exit if there were no useful activities for 5 minutes. However, this causes nasty races: we can easily oops in the 'bdi_queue_work()' if the bdi thread decides to exit while we are waking it up. And even if we do not oops, but the bdi tread exits immediately after we wake it up, we'd lose the wake-up event and have an unnecessary delay (up to 5 secs) in the bdi work processing. This patch makes the forker thread to be the central place which not only creates bdi threads, but also kills them if they were inactive long enough. This better design-wise. Another reason why this change was done is to prepare for the further changes which will prevent the bdi threads from waking up every 5 sec and wasting power. Indeed, when the task does not wake up periodically anymore, it won't be able to exit either. This patch also moves the the 'wake_up_bit()' call from the bdi thread to the forker thread as well. So now the forker thread sets the BDI_pending bit, then forks the task or kills it, then clears the bit and wakes up the waiting process. The only process which may wain on the bit is 'bdi_wb_shutdown()'. This function was changed as well - now it first removes the bdi from the 'bdi_list', then waits on the 'BDI_pending' bit. Once it wakes up, it is guaranteed that the forker thread won't race with it, because the bdi is not visible. Note, the forker thread sets the 'BDI_pending' bit under the 'bdi->wb_lock' which is essential for proper serialization. And additionally, when we change 'bdi->wb.task', we now take the 'bdi->work_lock', to make sure that we do not lose wake-ups which we otherwise would when raced with, say, 'bdi_queue_work()'. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--mm/backing-dev.c69
2 files changed, 70 insertions, 53 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f5cab75c157..905f3ea38488 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -78,21 +78,17 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
78 78
79 spin_lock(&bdi->wb_lock); 79 spin_lock(&bdi->wb_lock);
80 list_add_tail(&work->list, &bdi->work_list); 80 list_add_tail(&work->list, &bdi->work_list);
81 spin_unlock(&bdi->wb_lock); 81 if (bdi->wb.task) {
82 82 wake_up_process(bdi->wb.task);
83 /* 83 } else {
84 * If the default thread isn't there, make sure we add it. When 84 /*
85 * it gets created and wakes up, we'll run this work. 85 * The bdi thread isn't there, wake up the forker thread which
86 */ 86 * will create and run it.
87 if (unlikely(!bdi->wb.task)) { 87 */
88 trace_writeback_nothread(bdi, work); 88 trace_writeback_nothread(bdi, work);
89 wake_up_process(default_backing_dev_info.wb.task); 89 wake_up_process(default_backing_dev_info.wb.task);
90 } else {
91 struct bdi_writeback *wb = &bdi->wb;
92
93 if (wb->task)
94 wake_up_process(wb->task);
95 } 90 }
91 spin_unlock(&bdi->wb_lock);
96} 92}
97 93
98static void 94static void
@@ -800,7 +796,6 @@ int bdi_writeback_thread(void *data)
800{ 796{
801 struct bdi_writeback *wb = data; 797 struct bdi_writeback *wb = data;
802 struct backing_dev_info *bdi = wb->bdi; 798 struct backing_dev_info *bdi = wb->bdi;
803 unsigned long wait_jiffies = -1UL;
804 long pages_written; 799 long pages_written;
805 800
806 current->flags |= PF_FLUSHER | PF_SWAPWRITE; 801 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
@@ -812,13 +807,6 @@ int bdi_writeback_thread(void *data)
812 */ 807 */
813 set_user_nice(current, 0); 808 set_user_nice(current, 0);
814 809
815 /*
816 * Clear pending bit and wakeup anybody waiting to tear us down
817 */
818 clear_bit(BDI_pending, &bdi->state);
819 smp_mb__after_clear_bit();
820 wake_up_bit(&bdi->state, BDI_pending);
821
822 trace_writeback_thread_start(bdi); 810 trace_writeback_thread_start(bdi);
823 811
824 while (!kthread_should_stop()) { 812 while (!kthread_should_stop()) {
@@ -828,18 +816,6 @@ int bdi_writeback_thread(void *data)
828 816
829 if (pages_written) 817 if (pages_written)
830 wb->last_active = jiffies; 818 wb->last_active = jiffies;
831 else if (wait_jiffies != -1UL) {
832 unsigned long max_idle;
833
834 /*
835 * Longest period of inactivity that we tolerate. If we
836 * see dirty data again later, the thread will get
837 * recreated automatically.
838 */
839 max_idle = max(5UL * 60 * HZ, wait_jiffies);
840 if (time_after(jiffies, max_idle + wb->last_active))
841 break;
842 }
843 819
844 set_current_state(TASK_INTERRUPTIBLE); 820 set_current_state(TASK_INTERRUPTIBLE);
845 if (!list_empty(&bdi->work_list)) { 821 if (!list_empty(&bdi->work_list)) {
@@ -847,21 +823,15 @@ int bdi_writeback_thread(void *data)
847 continue; 823 continue;
848 } 824 }
849 825
850 if (dirty_writeback_interval) { 826 if (dirty_writeback_interval)
851 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); 827 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
852 schedule_timeout(wait_jiffies); 828 else
853 } else
854 schedule(); 829 schedule();
855 830
856 try_to_freeze(); 831 try_to_freeze();
857 } 832 }
858 833
859 wb->task = NULL; 834 /* Flush any work that raced with us exiting */
860
861 /*
862 * Flush any work that raced with us exiting. No new work
863 * will be added, since this bdi isn't discoverable anymore.
864 */
865 if (!list_empty(&bdi->work_list)) 835 if (!list_empty(&bdi->work_list))
866 wb_do_writeback(wb, 1); 836 wb_do_writeback(wb, 1);
867 837
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e104e32c2ee8..9c1c199f88ce 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -316,6 +316,18 @@ static void sync_supers_timer_fn(unsigned long unused)
316 bdi_arm_supers_timer(); 316 bdi_arm_supers_timer();
317} 317}
318 318
319/*
320 * Calculate the longest interval (jiffies) bdi threads are allowed to be
321 * inactive.
322 */
323static unsigned long bdi_longest_inactive(void)
324{
325 unsigned long interval;
326
327 interval = msecs_to_jiffies(dirty_writeback_interval * 10);
328 return max(5UL * 60 * HZ, interval);
329}
330
319static int bdi_forker_thread(void *ptr) 331static int bdi_forker_thread(void *ptr)
320{ 332{
321 struct bdi_writeback *me = ptr; 333 struct bdi_writeback *me = ptr;
@@ -329,11 +341,12 @@ static int bdi_forker_thread(void *ptr)
329 set_user_nice(current, 0); 341 set_user_nice(current, 0);
330 342
331 for (;;) { 343 for (;;) {
332 struct task_struct *task; 344 struct task_struct *task = NULL;
333 struct backing_dev_info *bdi; 345 struct backing_dev_info *bdi;
334 enum { 346 enum {
335 NO_ACTION, /* Nothing to do */ 347 NO_ACTION, /* Nothing to do */
336 FORK_THREAD, /* Fork bdi thread */ 348 FORK_THREAD, /* Fork bdi thread */
349 KILL_THREAD, /* Kill inactive bdi thread */
337 } action = NO_ACTION; 350 } action = NO_ACTION;
338 351
339 /* 352 /*
@@ -346,10 +359,6 @@ static int bdi_forker_thread(void *ptr)
346 spin_lock_bh(&bdi_lock); 359 spin_lock_bh(&bdi_lock);
347 set_current_state(TASK_INTERRUPTIBLE); 360 set_current_state(TASK_INTERRUPTIBLE);
348 361
349 /*
350 * Check if any existing bdi's have dirty data without
351 * a thread registered. If so, set that up.
352 */
353 list_for_each_entry(bdi, &bdi_list, bdi_list) { 362 list_for_each_entry(bdi, &bdi_list, bdi_list) {
354 bool have_dirty_io; 363 bool have_dirty_io;
355 364
@@ -376,6 +385,25 @@ static int bdi_forker_thread(void *ptr)
376 action = FORK_THREAD; 385 action = FORK_THREAD;
377 break; 386 break;
378 } 387 }
388
389 spin_lock(&bdi->wb_lock);
390 /*
391 * If there is no work to do and the bdi thread was
392 * inactive long enough - kill it. The wb_lock is taken
393 * to make sure no-one adds more work to this bdi and
394 * wakes the bdi thread up.
395 */
396 if (bdi->wb.task && !have_dirty_io &&
397 time_after(jiffies, bdi->wb.last_active +
398 bdi_longest_inactive())) {
399 task = bdi->wb.task;
400 bdi->wb.task = NULL;
401 spin_unlock(&bdi->wb_lock);
402 set_bit(BDI_pending, &bdi->state);
403 action = KILL_THREAD;
404 break;
405 }
406 spin_unlock(&bdi->wb_lock);
379 } 407 }
380 spin_unlock_bh(&bdi_lock); 408 spin_unlock_bh(&bdi_lock);
381 409
@@ -394,8 +422,20 @@ static int bdi_forker_thread(void *ptr)
394 * the bdi from the thread. 422 * the bdi from the thread.
395 */ 423 */
396 bdi_flush_io(bdi); 424 bdi_flush_io(bdi);
397 } else 425 } else {
426 /*
427 * The spinlock makes sure we do not lose
428 * wake-ups when racing with 'bdi_queue_work()'.
429 */
430 spin_lock(&bdi->wb_lock);
398 bdi->wb.task = task; 431 bdi->wb.task = task;
432 spin_unlock(&bdi->wb_lock);
433 }
434 break;
435
436 case KILL_THREAD:
437 __set_current_state(TASK_RUNNING);
438 kthread_stop(task);
399 break; 439 break;
400 440
401 case NO_ACTION: 441 case NO_ACTION:
@@ -407,6 +447,13 @@ static int bdi_forker_thread(void *ptr)
407 /* Back to the main loop */ 447 /* Back to the main loop */
408 continue; 448 continue;
409 } 449 }
450
451 /*
452 * Clear pending bit and wakeup anybody waiting to tear us down.
453 */
454 clear_bit(BDI_pending, &bdi->state);
455 smp_mb__after_clear_bit();
456 wake_up_bit(&bdi->state, BDI_pending);
410 } 457 }
411 458
412 return 0; 459 return 0;
@@ -490,15 +537,15 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
490 return; 537 return;
491 538
492 /* 539 /*
493 * If setup is pending, wait for that to complete first 540 * Make sure nobody finds us on the bdi_list anymore
494 */ 541 */
495 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, 542 bdi_remove_from_list(bdi);
496 TASK_UNINTERRUPTIBLE);
497 543
498 /* 544 /*
499 * Make sure nobody finds us on the bdi_list anymore 545 * If setup is pending, wait for that to complete first
500 */ 546 */
501 bdi_remove_from_list(bdi); 547 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
548 TASK_UNINTERRUPTIBLE);
502 549
503 /* 550 /*
504 * Finally, kill the kernel thread. We don't need to be RCU 551 * Finally, kill the kernel thread. We don't need to be RCU