diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-16 10:45:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-16 10:45:38 -0400 |
commit | a3eb51ecfa1d7be689f05c9f12cb0fcf862c516e (patch) | |
tree | 9995b3b9207c18a58ce99700f569b7bc2877e23c /mm | |
parent | fdaa45e95d2ef59a140d2fb2e487141f83f5a07c (diff) | |
parent | 1ef7d9aa32a8ee054c4d4fdcd2ea537c04d61b2f (diff) |
Merge branch 'writeback' of git://git.kernel.dk/linux-2.6-block
* 'writeback' of git://git.kernel.dk/linux-2.6-block:
writeback: fix possible bdi writeback refcounting problem
writeback: Fix bdi use after free in wb_work_complete()
writeback: improve scalability of bdi writeback work queues
writeback: remove smp_mb(), it's not needed with list_add_tail_rcu()
writeback: use schedule_timeout_interruptible()
writeback: add comments to bdi_work structure
writeback: splice dirty inode entries to default bdi on bdi_destroy()
writeback: separate starting of sync vs opportunistic writeback
writeback: inline allocation failure handling in bdi_alloc_queue_work()
writeback: use RCU to protect bdi_list
writeback: only use bdi_writeback_all() for WB_SYNC_NONE writeout
fs: Assign bdi in super_block
writeback: make wb_writeback() take an argument structure
writeback: merely wakeup flusher thread if work allocation fails for WB_SYNC_NONE
writeback: get rid of wbc->for_writepages
fs: remove bdev->bd_inode_backing_dev_info
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 90 | ||||
-rw-r--r-- | mm/page-writeback.c | 22 |
2 files changed, 74 insertions, 38 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca0dac1111..3d3accb1f800 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = { | |||
26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
27 | 27 | ||
28 | static struct class *bdi_class; | 28 | static struct class *bdi_class; |
29 | |||
30 | /* | ||
31 | * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as | ||
32 | * reader side protection for bdi_pending_list. bdi_list has RCU reader side | ||
33 | * locking. | ||
34 | */ | ||
29 | DEFINE_SPINLOCK(bdi_lock); | 35 | DEFINE_SPINLOCK(bdi_lock); |
30 | LIST_HEAD(bdi_list); | 36 | LIST_HEAD(bdi_list); |
31 | LIST_HEAD(bdi_pending_list); | 37 | LIST_HEAD(bdi_pending_list); |
@@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr) | |||
284 | /* | 290 | /* |
285 | * Add us to the active bdi_list | 291 | * Add us to the active bdi_list |
286 | */ | 292 | */ |
287 | spin_lock(&bdi_lock); | 293 | spin_lock_bh(&bdi_lock); |
288 | list_add(&bdi->bdi_list, &bdi_list); | 294 | list_add_rcu(&bdi->bdi_list, &bdi_list); |
289 | spin_unlock(&bdi_lock); | 295 | spin_unlock_bh(&bdi_lock); |
290 | 296 | ||
291 | bdi_task_init(bdi, wb); | 297 | bdi_task_init(bdi, wb); |
292 | 298 | ||
@@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr) | |||
389 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) | 395 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) |
390 | wb_do_writeback(me, 0); | 396 | wb_do_writeback(me, 0); |
391 | 397 | ||
392 | spin_lock(&bdi_lock); | 398 | spin_lock_bh(&bdi_lock); |
393 | 399 | ||
394 | /* | 400 | /* |
395 | * Check if any existing bdi's have dirty data without | 401 | * Check if any existing bdi's have dirty data without |
@@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr) | |||
410 | if (list_empty(&bdi_pending_list)) { | 416 | if (list_empty(&bdi_pending_list)) { |
411 | unsigned long wait; | 417 | unsigned long wait; |
412 | 418 | ||
413 | spin_unlock(&bdi_lock); | 419 | spin_unlock_bh(&bdi_lock); |
414 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); | 420 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); |
415 | schedule_timeout(wait); | 421 | schedule_timeout(wait); |
416 | try_to_freeze(); | 422 | try_to_freeze(); |
@@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr) | |||
426 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, | 432 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, |
427 | bdi_list); | 433 | bdi_list); |
428 | list_del_init(&bdi->bdi_list); | 434 | list_del_init(&bdi->bdi_list); |
429 | spin_unlock(&bdi_lock); | 435 | spin_unlock_bh(&bdi_lock); |
430 | 436 | ||
431 | wb = &bdi->wb; | 437 | wb = &bdi->wb; |
432 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", | 438 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", |
@@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr) | |||
445 | * a chance to flush other bdi's to free | 451 | * a chance to flush other bdi's to free |
446 | * memory. | 452 | * memory. |
447 | */ | 453 | */ |
448 | spin_lock(&bdi_lock); | 454 | spin_lock_bh(&bdi_lock); |
449 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | 455 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); |
450 | spin_unlock(&bdi_lock); | 456 | spin_unlock_bh(&bdi_lock); |
451 | 457 | ||
452 | bdi_flush_io(bdi); | 458 | bdi_flush_io(bdi); |
453 | } | 459 | } |
@@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr) | |||
456 | return 0; | 462 | return 0; |
457 | } | 463 | } |
458 | 464 | ||
465 | static void bdi_add_to_pending(struct rcu_head *head) | ||
466 | { | ||
467 | struct backing_dev_info *bdi; | ||
468 | |||
469 | bdi = container_of(head, struct backing_dev_info, rcu_head); | ||
470 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
471 | |||
472 | spin_lock(&bdi_lock); | ||
473 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | ||
474 | spin_unlock(&bdi_lock); | ||
475 | |||
476 | /* | ||
477 | * We are now on the pending list, wake up bdi_forker_task() | ||
478 | * to finish the job and add us back to the active bdi_list | ||
479 | */ | ||
480 | wake_up_process(default_backing_dev_info.wb.task); | ||
481 | } | ||
482 | |||
459 | /* | 483 | /* |
460 | * Add the default flusher task that gets created for any bdi | 484 | * Add the default flusher task that gets created for any bdi |
461 | * that has dirty data pending writeout | 485 | * that has dirty data pending writeout |
@@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) | |||
478 | * waiting for previous additions to finish. | 502 | * waiting for previous additions to finish. |
479 | */ | 503 | */ |
480 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { | 504 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { |
481 | list_move_tail(&bdi->bdi_list, &bdi_pending_list); | 505 | list_del_rcu(&bdi->bdi_list); |
482 | 506 | ||
483 | /* | 507 | /* |
484 | * We are now on the pending list, wake up bdi_forker_task() | 508 | * We must wait for the current RCU period to end before |
485 | * to finish the job and add us back to the active bdi_list | 509 | * moving to the pending list. So schedule that operation |
510 | * from an RCU callback. | ||
486 | */ | 511 | */ |
487 | wake_up_process(default_backing_dev_info.wb.task); | 512 | call_rcu(&bdi->rcu_head, bdi_add_to_pending); |
488 | } | 513 | } |
489 | } | 514 | } |
490 | 515 | ||
516 | /* | ||
517 | * Remove bdi from bdi_list, and ensure that it is no longer visible | ||
518 | */ | ||
519 | static void bdi_remove_from_list(struct backing_dev_info *bdi) | ||
520 | { | ||
521 | spin_lock_bh(&bdi_lock); | ||
522 | list_del_rcu(&bdi->bdi_list); | ||
523 | spin_unlock_bh(&bdi_lock); | ||
524 | |||
525 | synchronize_rcu(); | ||
526 | } | ||
527 | |||
491 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 528 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
492 | const char *fmt, ...) | 529 | const char *fmt, ...) |
493 | { | 530 | { |
@@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
506 | goto exit; | 543 | goto exit; |
507 | } | 544 | } |
508 | 545 | ||
509 | spin_lock(&bdi_lock); | 546 | spin_lock_bh(&bdi_lock); |
510 | list_add_tail(&bdi->bdi_list, &bdi_list); | 547 | list_add_tail_rcu(&bdi->bdi_list, &bdi_list); |
511 | spin_unlock(&bdi_lock); | 548 | spin_unlock_bh(&bdi_lock); |
512 | 549 | ||
513 | bdi->dev = dev; | 550 | bdi->dev = dev; |
514 | 551 | ||
@@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
526 | wb->task = NULL; | 563 | wb->task = NULL; |
527 | ret = -ENOMEM; | 564 | ret = -ENOMEM; |
528 | 565 | ||
529 | spin_lock(&bdi_lock); | 566 | bdi_remove_from_list(bdi); |
530 | list_del(&bdi->bdi_list); | ||
531 | spin_unlock(&bdi_lock); | ||
532 | goto exit; | 567 | goto exit; |
533 | } | 568 | } |
534 | } | 569 | } |
@@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
565 | /* | 600 | /* |
566 | * Make sure nobody finds us on the bdi_list anymore | 601 | * Make sure nobody finds us on the bdi_list anymore |
567 | */ | 602 | */ |
568 | spin_lock(&bdi_lock); | 603 | bdi_remove_from_list(bdi); |
569 | list_del(&bdi->bdi_list); | ||
570 | spin_unlock(&bdi_lock); | ||
571 | 604 | ||
572 | /* | 605 | /* |
573 | * Finally, kill the kernel threads. We don't need to be RCU | 606 | * Finally, kill the kernel threads. We don't need to be RCU |
@@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi) | |||
599 | bdi->max_ratio = 100; | 632 | bdi->max_ratio = 100; |
600 | bdi->max_prop_frac = PROP_FRAC_BASE; | 633 | bdi->max_prop_frac = PROP_FRAC_BASE; |
601 | spin_lock_init(&bdi->wb_lock); | 634 | spin_lock_init(&bdi->wb_lock); |
635 | INIT_RCU_HEAD(&bdi->rcu_head); | ||
602 | INIT_LIST_HEAD(&bdi->bdi_list); | 636 | INIT_LIST_HEAD(&bdi->bdi_list); |
603 | INIT_LIST_HEAD(&bdi->wb_list); | 637 | INIT_LIST_HEAD(&bdi->wb_list); |
604 | INIT_LIST_HEAD(&bdi->work_list); | 638 | INIT_LIST_HEAD(&bdi->work_list); |
@@ -634,7 +668,19 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
634 | { | 668 | { |
635 | int i; | 669 | int i; |
636 | 670 | ||
637 | WARN_ON(bdi_has_dirty_io(bdi)); | 671 | /* |
672 | * Splice our entries to the default_backing_dev_info, if this | ||
673 | * bdi disappears | ||
674 | */ | ||
675 | if (bdi_has_dirty_io(bdi)) { | ||
676 | struct bdi_writeback *dst = &default_backing_dev_info.wb; | ||
677 | |||
678 | spin_lock(&inode_lock); | ||
679 | list_splice(&bdi->wb.b_dirty, &dst->b_dirty); | ||
680 | list_splice(&bdi->wb.b_io, &dst->b_io); | ||
681 | list_splice(&bdi->wb.b_more_io, &dst->b_more_io); | ||
682 | spin_unlock(&inode_lock); | ||
683 | } | ||
638 | 684 | ||
639 | bdi_unregister(bdi); | 685 | bdi_unregister(bdi); |
640 | 686 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dd73d29c15a8..1eea4fa0d410 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
315 | { | 315 | { |
316 | int ret = 0; | 316 | int ret = 0; |
317 | 317 | ||
318 | spin_lock(&bdi_lock); | 318 | spin_lock_bh(&bdi_lock); |
319 | if (min_ratio > bdi->max_ratio) { | 319 | if (min_ratio > bdi->max_ratio) { |
320 | ret = -EINVAL; | 320 | ret = -EINVAL; |
321 | } else { | 321 | } else { |
@@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
327 | ret = -EINVAL; | 327 | ret = -EINVAL; |
328 | } | 328 | } |
329 | } | 329 | } |
330 | spin_unlock(&bdi_lock); | 330 | spin_unlock_bh(&bdi_lock); |
331 | 331 | ||
332 | return ret; | 332 | return ret; |
333 | } | 333 | } |
@@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | |||
339 | if (max_ratio > 100) | 339 | if (max_ratio > 100) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
342 | spin_lock(&bdi_lock); | 342 | spin_lock_bh(&bdi_lock); |
343 | if (bdi->min_ratio > max_ratio) { | 343 | if (bdi->min_ratio > max_ratio) { |
344 | ret = -EINVAL; | 344 | ret = -EINVAL; |
345 | } else { | 345 | } else { |
346 | bdi->max_ratio = max_ratio; | 346 | bdi->max_ratio = max_ratio; |
347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
348 | } | 348 | } |
349 | spin_unlock(&bdi_lock); | 349 | spin_unlock_bh(&bdi_lock); |
350 | 350 | ||
351 | return ret; | 351 | return ret; |
352 | } | 352 | } |
@@ -582,16 +582,8 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
582 | if ((laptop_mode && pages_written) || | 582 | if ((laptop_mode && pages_written) || |
583 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) | 583 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) |
584 | + global_page_state(NR_UNSTABLE_NFS)) | 584 | + global_page_state(NR_UNSTABLE_NFS)) |
585 | > background_thresh))) { | 585 | > background_thresh))) |
586 | struct writeback_control wbc = { | 586 | bdi_start_writeback(bdi, nr_writeback); |
587 | .bdi = bdi, | ||
588 | .sync_mode = WB_SYNC_NONE, | ||
589 | .nr_to_write = nr_writeback, | ||
590 | }; | ||
591 | |||
592 | |||
593 | bdi_start_writeback(&wbc); | ||
594 | } | ||
595 | } | 587 | } |
596 | 588 | ||
597 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 589 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
@@ -1020,12 +1012,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
1020 | 1012 | ||
1021 | if (wbc->nr_to_write <= 0) | 1013 | if (wbc->nr_to_write <= 0) |
1022 | return 0; | 1014 | return 0; |
1023 | wbc->for_writepages = 1; | ||
1024 | if (mapping->a_ops->writepages) | 1015 | if (mapping->a_ops->writepages) |
1025 | ret = mapping->a_ops->writepages(mapping, wbc); | 1016 | ret = mapping->a_ops->writepages(mapping, wbc); |
1026 | else | 1017 | else |
1027 | ret = generic_writepages(mapping, wbc); | 1018 | ret = generic_writepages(mapping, wbc); |
1028 | wbc->for_writepages = 0; | ||
1029 | return ret; | 1019 | return ret; |
1030 | } | 1020 | } |
1031 | 1021 | ||