diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-09-14 07:12:40 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-09-16 09:18:51 -0400 |
commit | cfc4ba5365449cb6b5c9f68d755a142f17da1e47 (patch) | |
tree | 08770de9bb0e658f2e65abd4d10187b3e9f6bb1b | |
parent | f11fcae8401a3175f528e2f7917362645d570111 (diff) |
writeback: use RCU to protect bdi_list
Now that bdi_writeback_all() no longer handles integrity writeback,
it doesn't have to block anymore. This means that we can switch
bdi_list reader side protection to RCU.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/fs-writeback.c | 6 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 76 | ||||
-rw-r--r-- | mm/page-writeback.c | 8 |
4 files changed, 63 insertions, 28 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 14f06b459197..f8cd7a97f5b7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -868,16 +868,16 @@ static void bdi_writeback_all(struct writeback_control *wbc) | |||
868 | 868 | ||
869 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); | 869 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); |
870 | 870 | ||
871 | spin_lock(&bdi_lock); | 871 | rcu_read_lock(); |
872 | 872 | ||
873 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | 873 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
874 | if (!bdi_has_dirty_io(bdi)) | 874 | if (!bdi_has_dirty_io(bdi)) |
875 | continue; | 875 | continue; |
876 | 876 | ||
877 | bdi_alloc_queue_work(bdi, wbc); | 877 | bdi_alloc_queue_work(bdi, wbc); |
878 | } | 878 | } |
879 | 879 | ||
880 | spin_unlock(&bdi_lock); | 880 | rcu_read_unlock(); |
881 | } | 881 | } |
882 | 882 | ||
883 | /* | 883 | /* |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f169bcb90b58..859e797f4576 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -59,6 +59,7 @@ struct bdi_writeback { | |||
59 | 59 | ||
60 | struct backing_dev_info { | 60 | struct backing_dev_info { |
61 | struct list_head bdi_list; | 61 | struct list_head bdi_list; |
62 | struct rcu_head rcu_head; | ||
62 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ | 63 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ |
63 | unsigned long state; /* Always use atomic bitops on this */ | 64 | unsigned long state; /* Always use atomic bitops on this */ |
64 | unsigned int capabilities; /* Device capabilities */ | 65 | unsigned int capabilities; /* Device capabilities */ |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca0dac1111..fd93566345b6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = { | |||
26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
27 | 27 | ||
28 | static struct class *bdi_class; | 28 | static struct class *bdi_class; |
29 | |||
30 | /* | ||
31 | * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as | ||
32 | * reader side protection for bdi_pending_list. bdi_list has RCU reader side | ||
33 | * locking. | ||
34 | */ | ||
29 | DEFINE_SPINLOCK(bdi_lock); | 35 | DEFINE_SPINLOCK(bdi_lock); |
30 | LIST_HEAD(bdi_list); | 36 | LIST_HEAD(bdi_list); |
31 | LIST_HEAD(bdi_pending_list); | 37 | LIST_HEAD(bdi_pending_list); |
@@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr) | |||
284 | /* | 290 | /* |
285 | * Add us to the active bdi_list | 291 | * Add us to the active bdi_list |
286 | */ | 292 | */ |
287 | spin_lock(&bdi_lock); | 293 | spin_lock_bh(&bdi_lock); |
288 | list_add(&bdi->bdi_list, &bdi_list); | 294 | list_add_rcu(&bdi->bdi_list, &bdi_list); |
289 | spin_unlock(&bdi_lock); | 295 | spin_unlock_bh(&bdi_lock); |
290 | 296 | ||
291 | bdi_task_init(bdi, wb); | 297 | bdi_task_init(bdi, wb); |
292 | 298 | ||
@@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr) | |||
389 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) | 395 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) |
390 | wb_do_writeback(me, 0); | 396 | wb_do_writeback(me, 0); |
391 | 397 | ||
392 | spin_lock(&bdi_lock); | 398 | spin_lock_bh(&bdi_lock); |
393 | 399 | ||
394 | /* | 400 | /* |
395 | * Check if any existing bdi's have dirty data without | 401 | * Check if any existing bdi's have dirty data without |
@@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr) | |||
410 | if (list_empty(&bdi_pending_list)) { | 416 | if (list_empty(&bdi_pending_list)) { |
411 | unsigned long wait; | 417 | unsigned long wait; |
412 | 418 | ||
413 | spin_unlock(&bdi_lock); | 419 | spin_unlock_bh(&bdi_lock); |
414 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); | 420 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); |
415 | schedule_timeout(wait); | 421 | schedule_timeout(wait); |
416 | try_to_freeze(); | 422 | try_to_freeze(); |
@@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr) | |||
426 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, | 432 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, |
427 | bdi_list); | 433 | bdi_list); |
428 | list_del_init(&bdi->bdi_list); | 434 | list_del_init(&bdi->bdi_list); |
429 | spin_unlock(&bdi_lock); | 435 | spin_unlock_bh(&bdi_lock); |
430 | 436 | ||
431 | wb = &bdi->wb; | 437 | wb = &bdi->wb; |
432 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", | 438 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", |
@@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr) | |||
445 | * a chance to flush other bdi's to free | 451 | * a chance to flush other bdi's to free |
446 | * memory. | 452 | * memory. |
447 | */ | 453 | */ |
448 | spin_lock(&bdi_lock); | 454 | spin_lock_bh(&bdi_lock); |
449 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | 455 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); |
450 | spin_unlock(&bdi_lock); | 456 | spin_unlock_bh(&bdi_lock); |
451 | 457 | ||
452 | bdi_flush_io(bdi); | 458 | bdi_flush_io(bdi); |
453 | } | 459 | } |
@@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr) | |||
456 | return 0; | 462 | return 0; |
457 | } | 463 | } |
458 | 464 | ||
465 | static void bdi_add_to_pending(struct rcu_head *head) | ||
466 | { | ||
467 | struct backing_dev_info *bdi; | ||
468 | |||
469 | bdi = container_of(head, struct backing_dev_info, rcu_head); | ||
470 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
471 | |||
472 | spin_lock(&bdi_lock); | ||
473 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | ||
474 | spin_unlock(&bdi_lock); | ||
475 | |||
476 | /* | ||
477 | * We are now on the pending list, wake up bdi_forker_task() | ||
478 | * to finish the job and add us back to the active bdi_list | ||
479 | */ | ||
480 | wake_up_process(default_backing_dev_info.wb.task); | ||
481 | } | ||
482 | |||
459 | /* | 483 | /* |
460 | * Add the default flusher task that gets created for any bdi | 484 | * Add the default flusher task that gets created for any bdi |
461 | * that has dirty data pending writeout | 485 | * that has dirty data pending writeout |
@@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) | |||
478 | * waiting for previous additions to finish. | 502 | * waiting for previous additions to finish. |
479 | */ | 503 | */ |
480 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { | 504 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { |
481 | list_move_tail(&bdi->bdi_list, &bdi_pending_list); | 505 | list_del_rcu(&bdi->bdi_list); |
482 | 506 | ||
483 | /* | 507 | /* |
484 | * We are now on the pending list, wake up bdi_forker_task() | 508 | * We must wait for the current RCU period to end before |
485 | * to finish the job and add us back to the active bdi_list | 509 | * moving to the pending list. So schedule that operation |
510 | * from an RCU callback. | ||
486 | */ | 511 | */ |
487 | wake_up_process(default_backing_dev_info.wb.task); | 512 | call_rcu(&bdi->rcu_head, bdi_add_to_pending); |
488 | } | 513 | } |
489 | } | 514 | } |
490 | 515 | ||
516 | /* | ||
517 | * Remove bdi from bdi_list, and ensure that it is no longer visible | ||
518 | */ | ||
519 | static void bdi_remove_from_list(struct backing_dev_info *bdi) | ||
520 | { | ||
521 | spin_lock_bh(&bdi_lock); | ||
522 | list_del_rcu(&bdi->bdi_list); | ||
523 | spin_unlock_bh(&bdi_lock); | ||
524 | |||
525 | synchronize_rcu(); | ||
526 | } | ||
527 | |||
491 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 528 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
492 | const char *fmt, ...) | 529 | const char *fmt, ...) |
493 | { | 530 | { |
@@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
506 | goto exit; | 543 | goto exit; |
507 | } | 544 | } |
508 | 545 | ||
509 | spin_lock(&bdi_lock); | 546 | spin_lock_bh(&bdi_lock); |
510 | list_add_tail(&bdi->bdi_list, &bdi_list); | 547 | list_add_tail_rcu(&bdi->bdi_list, &bdi_list); |
511 | spin_unlock(&bdi_lock); | 548 | spin_unlock_bh(&bdi_lock); |
512 | 549 | ||
513 | bdi->dev = dev; | 550 | bdi->dev = dev; |
514 | 551 | ||
@@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
526 | wb->task = NULL; | 563 | wb->task = NULL; |
527 | ret = -ENOMEM; | 564 | ret = -ENOMEM; |
528 | 565 | ||
529 | spin_lock(&bdi_lock); | 566 | bdi_remove_from_list(bdi); |
530 | list_del(&bdi->bdi_list); | ||
531 | spin_unlock(&bdi_lock); | ||
532 | goto exit; | 567 | goto exit; |
533 | } | 568 | } |
534 | } | 569 | } |
@@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
565 | /* | 600 | /* |
566 | * Make sure nobody finds us on the bdi_list anymore | 601 | * Make sure nobody finds us on the bdi_list anymore |
567 | */ | 602 | */ |
568 | spin_lock(&bdi_lock); | 603 | bdi_remove_from_list(bdi); |
569 | list_del(&bdi->bdi_list); | ||
570 | spin_unlock(&bdi_lock); | ||
571 | 604 | ||
572 | /* | 605 | /* |
573 | * Finally, kill the kernel threads. We don't need to be RCU | 606 | * Finally, kill the kernel threads. We don't need to be RCU |
@@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi) | |||
599 | bdi->max_ratio = 100; | 632 | bdi->max_ratio = 100; |
600 | bdi->max_prop_frac = PROP_FRAC_BASE; | 633 | bdi->max_prop_frac = PROP_FRAC_BASE; |
601 | spin_lock_init(&bdi->wb_lock); | 634 | spin_lock_init(&bdi->wb_lock); |
635 | INIT_RCU_HEAD(&bdi->rcu_head); | ||
602 | INIT_LIST_HEAD(&bdi->bdi_list); | 636 | INIT_LIST_HEAD(&bdi->bdi_list); |
603 | INIT_LIST_HEAD(&bdi->wb_list); | 637 | INIT_LIST_HEAD(&bdi->wb_list); |
604 | INIT_LIST_HEAD(&bdi->work_list); | 638 | INIT_LIST_HEAD(&bdi->work_list); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index abc648f5de00..12c3d843ce93 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
315 | { | 315 | { |
316 | int ret = 0; | 316 | int ret = 0; |
317 | 317 | ||
318 | spin_lock(&bdi_lock); | 318 | spin_lock_bh(&bdi_lock); |
319 | if (min_ratio > bdi->max_ratio) { | 319 | if (min_ratio > bdi->max_ratio) { |
320 | ret = -EINVAL; | 320 | ret = -EINVAL; |
321 | } else { | 321 | } else { |
@@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
327 | ret = -EINVAL; | 327 | ret = -EINVAL; |
328 | } | 328 | } |
329 | } | 329 | } |
330 | spin_unlock(&bdi_lock); | 330 | spin_unlock_bh(&bdi_lock); |
331 | 331 | ||
332 | return ret; | 332 | return ret; |
333 | } | 333 | } |
@@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | |||
339 | if (max_ratio > 100) | 339 | if (max_ratio > 100) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
342 | spin_lock(&bdi_lock); | 342 | spin_lock_bh(&bdi_lock); |
343 | if (bdi->min_ratio > max_ratio) { | 343 | if (bdi->min_ratio > max_ratio) { |
344 | ret = -EINVAL; | 344 | ret = -EINVAL; |
345 | } else { | 345 | } else { |
346 | bdi->max_ratio = max_ratio; | 346 | bdi->max_ratio = max_ratio; |
347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
348 | } | 348 | } |
349 | spin_unlock(&bdi_lock); | 349 | spin_unlock_bh(&bdi_lock); |
350 | 350 | ||
351 | return ret; | 351 | return ret; |
352 | } | 352 | } |