diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2010-07-25 07:29:17 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-08-07 12:53:56 -0400 |
commit | 78c40cb6581a74adc48821f3de6b864a54d4c34d (patch) | |
tree | 9ef19e68f6c28ded34f42de8a21b139006bfceaa /mm/backing-dev.c | |
parent | 080dcec41709be72613133f695be75b98dd43e88 (diff) |
writeback: do not remove bdi from bdi_list
The forker thread removes bdis from 'bdi_list' before forking the bdi thread.
But this is wrong for at least 2 reasons.
Reason #1: if we temporary remove a bdi from the list, we may miss works which
would otherwise be given to us.
Reason #2: this is racy; indeed, 'bdi_wb_shutdown()' expects that bdis are
always in the 'bdi_list' (see 'bdi_remove_from_list()'), and when
it races with the forker thread, it can shut down the bdi thread
at the same time as the forker creates it.
This patch makes sure the forker thread never removes bdis from 'bdi_list'
(which was suggested by Christoph Hellwig).
In order to make sure that we do not race with 'bdi_wb_shutdown()', we have to
hold the 'bdi_lock' while walking the 'bdi_list' and setting the 'BDI_pending'
flag.
NOTE! The error path is interesting. Currently, when we fail to create a bdi
thread, we move the bdi to the tail of 'bdi_list'. But if we never remove the
bdi from the list, we cannot move it to the tail either, because then we can
mess up the RCU readers which walk the list. And also, we'll have the race
described above in "Reason #2".
But I not think that adding to the tail is any important so I just do not do
that.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r-- | mm/backing-dev.c | 31 |
1 files changed, 10 insertions, 21 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index dbc66815a0fe..672c17bb32db 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -331,7 +331,7 @@ static int bdi_forker_thread(void *ptr) | |||
331 | for (;;) { | 331 | for (;;) { |
332 | bool fork = false; | 332 | bool fork = false; |
333 | struct task_struct *task; | 333 | struct task_struct *task; |
334 | struct backing_dev_info *bdi, *tmp; | 334 | struct backing_dev_info *bdi; |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Temporary measure, we want to make sure we don't see | 337 | * Temporary measure, we want to make sure we don't see |
@@ -347,7 +347,7 @@ static int bdi_forker_thread(void *ptr) | |||
347 | * Check if any existing bdi's have dirty data without | 347 | * Check if any existing bdi's have dirty data without |
348 | * a thread registered. If so, set that up. | 348 | * a thread registered. If so, set that up. |
349 | */ | 349 | */ |
350 | list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { | 350 | list_for_each_entry(bdi, &bdi_list, bdi_list) { |
351 | if (!bdi_cap_writeback_dirty(bdi)) | 351 | if (!bdi_cap_writeback_dirty(bdi)) |
352 | continue; | 352 | continue; |
353 | if (bdi->wb.task) | 353 | if (bdi->wb.task) |
@@ -359,8 +359,13 @@ static int bdi_forker_thread(void *ptr) | |||
359 | WARN(!test_bit(BDI_registered, &bdi->state), | 359 | WARN(!test_bit(BDI_registered, &bdi->state), |
360 | "bdi %p/%s is not registered!\n", bdi, bdi->name); | 360 | "bdi %p/%s is not registered!\n", bdi, bdi->name); |
361 | 361 | ||
362 | list_del_rcu(&bdi->bdi_list); | ||
363 | fork = true; | 362 | fork = true; |
363 | |||
364 | /* | ||
365 | * Set the pending bit - if someone will try to | ||
366 | * unregister this bdi - it'll wait on this bit. | ||
367 | */ | ||
368 | set_bit(BDI_pending, &bdi->state); | ||
364 | break; | 369 | break; |
365 | } | 370 | } |
366 | spin_unlock_bh(&bdi_lock); | 371 | spin_unlock_bh(&bdi_lock); |
@@ -383,29 +388,13 @@ static int bdi_forker_thread(void *ptr) | |||
383 | 388 | ||
384 | __set_current_state(TASK_RUNNING); | 389 | __set_current_state(TASK_RUNNING); |
385 | 390 | ||
386 | /* | ||
387 | * Set the pending bit - if someone will try to unregister this | ||
388 | * bdi - it'll wait on this bit. | ||
389 | */ | ||
390 | set_bit(BDI_pending, &bdi->state); | ||
391 | |||
392 | /* Make sure no one uses the picked bdi */ | ||
393 | synchronize_rcu(); | ||
394 | |||
395 | task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", | 391 | task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", |
396 | dev_name(bdi->dev)); | 392 | dev_name(bdi->dev)); |
397 | if (IS_ERR(task)) { | 393 | if (IS_ERR(task)) { |
398 | /* | 394 | /* |
399 | * If thread creation fails, then readd the bdi back to | 395 | * If thread creation fails, force writeout of the bdi |
400 | * the list and force writeout of the bdi from this | 396 | * from the thread. |
401 | * forker thread. That will free some memory and we can | ||
402 | * try again. Add it to the tail so we get a chance to | ||
403 | * flush other bdi's to free memory. | ||
404 | */ | 397 | */ |
405 | spin_lock_bh(&bdi_lock); | ||
406 | list_add_tail_rcu(&bdi->bdi_list, &bdi_list); | ||
407 | spin_unlock_bh(&bdi_lock); | ||
408 | |||
409 | bdi_flush_io(bdi); | 398 | bdi_flush_io(bdi); |
410 | } else | 399 | } else |
411 | bdi->wb.task = task; | 400 | bdi->wb.task = task; |