aboutsummaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2010-07-25 07:29:17 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-08-07 12:53:56 -0400
commit78c40cb6581a74adc48821f3de6b864a54d4c34d (patch)
tree9ef19e68f6c28ded34f42de8a21b139006bfceaa /mm/backing-dev.c
parent080dcec41709be72613133f695be75b98dd43e88 (diff)
writeback: do not remove bdi from bdi_list
The forker thread removes bdis from 'bdi_list' before forking the bdi thread. But this is wrong for at least 2 reasons. Reason #1: if we temporary remove a bdi from the list, we may miss works which would otherwise be given to us. Reason #2: this is racy; indeed, 'bdi_wb_shutdown()' expects that bdis are always in the 'bdi_list' (see 'bdi_remove_from_list()'), and when it races with the forker thread, it can shut down the bdi thread at the same time as the forker creates it. This patch makes sure the forker thread never removes bdis from 'bdi_list' (which was suggested by Christoph Hellwig). In order to make sure that we do not race with 'bdi_wb_shutdown()', we have to hold the 'bdi_lock' while walking the 'bdi_list' and setting the 'BDI_pending' flag. NOTE! The error path is interesting. Currently, when we fail to create a bdi thread, we move the bdi to the tail of 'bdi_list'. But if we never remove the bdi from the list, we cannot move it to the tail either, because then we can mess up the RCU readers which walk the list. And also, we'll have the race described above in "Reason #2". But I not think that adding to the tail is any important so I just do not do that. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c31
1 files changed, 10 insertions, 21 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dbc66815a0fe..672c17bb32db 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -331,7 +331,7 @@ static int bdi_forker_thread(void *ptr)
331 for (;;) { 331 for (;;) {
332 bool fork = false; 332 bool fork = false;
333 struct task_struct *task; 333 struct task_struct *task;
334 struct backing_dev_info *bdi, *tmp; 334 struct backing_dev_info *bdi;
335 335
336 /* 336 /*
337 * Temporary measure, we want to make sure we don't see 337 * Temporary measure, we want to make sure we don't see
@@ -347,7 +347,7 @@ static int bdi_forker_thread(void *ptr)
347 * Check if any existing bdi's have dirty data without 347 * Check if any existing bdi's have dirty data without
348 * a thread registered. If so, set that up. 348 * a thread registered. If so, set that up.
349 */ 349 */
350 list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { 350 list_for_each_entry(bdi, &bdi_list, bdi_list) {
351 if (!bdi_cap_writeback_dirty(bdi)) 351 if (!bdi_cap_writeback_dirty(bdi))
352 continue; 352 continue;
353 if (bdi->wb.task) 353 if (bdi->wb.task)
@@ -359,8 +359,13 @@ static int bdi_forker_thread(void *ptr)
359 WARN(!test_bit(BDI_registered, &bdi->state), 359 WARN(!test_bit(BDI_registered, &bdi->state),
360 "bdi %p/%s is not registered!\n", bdi, bdi->name); 360 "bdi %p/%s is not registered!\n", bdi, bdi->name);
361 361
362 list_del_rcu(&bdi->bdi_list);
363 fork = true; 362 fork = true;
363
364 /*
365 * Set the pending bit - if someone will try to
366 * unregister this bdi - it'll wait on this bit.
367 */
368 set_bit(BDI_pending, &bdi->state);
364 break; 369 break;
365 } 370 }
366 spin_unlock_bh(&bdi_lock); 371 spin_unlock_bh(&bdi_lock);
@@ -383,29 +388,13 @@ static int bdi_forker_thread(void *ptr)
383 388
384 __set_current_state(TASK_RUNNING); 389 __set_current_state(TASK_RUNNING);
385 390
386 /*
387 * Set the pending bit - if someone will try to unregister this
388 * bdi - it'll wait on this bit.
389 */
390 set_bit(BDI_pending, &bdi->state);
391
392 /* Make sure no one uses the picked bdi */
393 synchronize_rcu();
394
395 task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", 391 task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s",
396 dev_name(bdi->dev)); 392 dev_name(bdi->dev));
397 if (IS_ERR(task)) { 393 if (IS_ERR(task)) {
398 /* 394 /*
399 * If thread creation fails, then readd the bdi back to 395 * If thread creation fails, force writeout of the bdi
400 * the list and force writeout of the bdi from this 396 * from the thread.
401 * forker thread. That will free some memory and we can
402 * try again. Add it to the tail so we get a chance to
403 * flush other bdi's to free memory.
404 */ 397 */
405 spin_lock_bh(&bdi_lock);
406 list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
407 spin_unlock_bh(&bdi_lock);
408
409 bdi_flush_io(bdi); 398 bdi_flush_io(bdi);
410 } else 399 } else
411 bdi->wb.task = task; 400 bdi->wb.task = task;