aboutsummaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c186
1 files changed, 93 insertions, 93 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c6f2a37028c2..f028a9a472fd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -12,8 +12,6 @@
12#include <linux/device.h> 12#include <linux/device.h>
13#include <trace/events/writeback.h> 13#include <trace/events/writeback.h>
14 14
15static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
16
17struct backing_dev_info noop_backing_dev_info = { 15struct backing_dev_info noop_backing_dev_info = {
18 .name = "noop", 16 .name = "noop",
19 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 17 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
@@ -242,6 +240,8 @@ static __init int bdi_class_init(void)
242} 240}
243postcore_initcall(bdi_class_init); 241postcore_initcall(bdi_class_init);
244 242
243static int bdi_init(struct backing_dev_info *bdi);
244
245static int __init default_bdi_init(void) 245static int __init default_bdi_init(void)
246{ 246{
247 int err; 247 int err;
@@ -294,6 +294,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
294 294
295 memset(wb, 0, sizeof(*wb)); 295 memset(wb, 0, sizeof(*wb));
296 296
297 if (wb != &bdi->wb)
298 bdi_get(bdi);
297 wb->bdi = bdi; 299 wb->bdi = bdi;
298 wb->last_old_flush = jiffies; 300 wb->last_old_flush = jiffies;
299 INIT_LIST_HEAD(&wb->b_dirty); 301 INIT_LIST_HEAD(&wb->b_dirty);
@@ -314,8 +316,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
314 wb->dirty_sleep = jiffies; 316 wb->dirty_sleep = jiffies;
315 317
316 wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); 318 wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
317 if (!wb->congested) 319 if (!wb->congested) {
318 return -ENOMEM; 320 err = -ENOMEM;
321 goto out_put_bdi;
322 }
319 323
320 err = fprop_local_init_percpu(&wb->completions, gfp); 324 err = fprop_local_init_percpu(&wb->completions, gfp);
321 if (err) 325 if (err)
@@ -335,9 +339,14 @@ out_destroy_stat:
335 fprop_local_destroy_percpu(&wb->completions); 339 fprop_local_destroy_percpu(&wb->completions);
336out_put_cong: 340out_put_cong:
337 wb_congested_put(wb->congested); 341 wb_congested_put(wb->congested);
342out_put_bdi:
343 if (wb != &bdi->wb)
344 bdi_put(bdi);
338 return err; 345 return err;
339} 346}
340 347
348static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
349
341/* 350/*
342 * Remove bdi from the global list and shutdown any threads we have running 351 * Remove bdi from the global list and shutdown any threads we have running
343 */ 352 */
@@ -347,10 +356,18 @@ static void wb_shutdown(struct bdi_writeback *wb)
347 spin_lock_bh(&wb->work_lock); 356 spin_lock_bh(&wb->work_lock);
348 if (!test_and_clear_bit(WB_registered, &wb->state)) { 357 if (!test_and_clear_bit(WB_registered, &wb->state)) {
349 spin_unlock_bh(&wb->work_lock); 358 spin_unlock_bh(&wb->work_lock);
359 /*
360 * Wait for wb shutdown to finish if someone else is just
361 * running wb_shutdown(). Otherwise we could proceed to wb /
362 * bdi destruction before wb_shutdown() is finished.
363 */
364 wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
350 return; 365 return;
351 } 366 }
367 set_bit(WB_shutting_down, &wb->state);
352 spin_unlock_bh(&wb->work_lock); 368 spin_unlock_bh(&wb->work_lock);
353 369
370 cgwb_remove_from_bdi_list(wb);
354 /* 371 /*
355 * Drain work list and shutdown the delayed_work. !WB_registered 372 * Drain work list and shutdown the delayed_work. !WB_registered
356 * tells wb_workfn() that @wb is dying and its work_list needs to 373 * tells wb_workfn() that @wb is dying and its work_list needs to
@@ -359,6 +376,12 @@ static void wb_shutdown(struct bdi_writeback *wb)
359 mod_delayed_work(bdi_wq, &wb->dwork, 0); 376 mod_delayed_work(bdi_wq, &wb->dwork, 0);
360 flush_delayed_work(&wb->dwork); 377 flush_delayed_work(&wb->dwork);
361 WARN_ON(!list_empty(&wb->work_list)); 378 WARN_ON(!list_empty(&wb->work_list));
379 /*
380 * Make sure bit gets cleared after shutdown is finished. Matches with
381 * the barrier provided by test_and_clear_bit() above.
382 */
383 smp_wmb();
384 clear_bit(WB_shutting_down, &wb->state);
362} 385}
363 386
364static void wb_exit(struct bdi_writeback *wb) 387static void wb_exit(struct bdi_writeback *wb)
@@ -372,6 +395,8 @@ static void wb_exit(struct bdi_writeback *wb)
372 395
373 fprop_local_destroy_percpu(&wb->completions); 396 fprop_local_destroy_percpu(&wb->completions);
374 wb_congested_put(wb->congested); 397 wb_congested_put(wb->congested);
398 if (wb != &wb->bdi->wb)
399 bdi_put(wb->bdi);
375} 400}
376 401
377#ifdef CONFIG_CGROUP_WRITEBACK 402#ifdef CONFIG_CGROUP_WRITEBACK
@@ -381,11 +406,9 @@ static void wb_exit(struct bdi_writeback *wb)
381/* 406/*
382 * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree, 407 * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
383 * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU 408 * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
384 * protected. cgwb_release_wait is used to wait for the completion of cgwb 409 * protected.
385 * releases from bdi destruction path.
386 */ 410 */
387static DEFINE_SPINLOCK(cgwb_lock); 411static DEFINE_SPINLOCK(cgwb_lock);
388static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
389 412
390/** 413/**
391 * wb_congested_get_create - get or create a wb_congested 414 * wb_congested_get_create - get or create a wb_congested
@@ -438,7 +461,7 @@ retry:
438 return NULL; 461 return NULL;
439 462
440 atomic_set(&new_congested->refcnt, 0); 463 atomic_set(&new_congested->refcnt, 0);
441 new_congested->bdi = bdi; 464 new_congested->__bdi = bdi;
442 new_congested->blkcg_id = blkcg_id; 465 new_congested->blkcg_id = blkcg_id;
443 goto retry; 466 goto retry;
444 467
@@ -466,10 +489,10 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
466 } 489 }
467 490
468 /* bdi might already have been destroyed leaving @congested unlinked */ 491 /* bdi might already have been destroyed leaving @congested unlinked */
469 if (congested->bdi) { 492 if (congested->__bdi) {
470 rb_erase(&congested->rb_node, 493 rb_erase(&congested->rb_node,
471 &congested->bdi->cgwb_congested_tree); 494 &congested->__bdi->cgwb_congested_tree);
472 congested->bdi = NULL; 495 congested->__bdi = NULL;
473 } 496 }
474 497
475 spin_unlock_irqrestore(&cgwb_lock, flags); 498 spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -480,11 +503,6 @@ static void cgwb_release_workfn(struct work_struct *work)
480{ 503{
481 struct bdi_writeback *wb = container_of(work, struct bdi_writeback, 504 struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
482 release_work); 505 release_work);
483 struct backing_dev_info *bdi = wb->bdi;
484
485 spin_lock_irq(&cgwb_lock);
486 list_del_rcu(&wb->bdi_node);
487 spin_unlock_irq(&cgwb_lock);
488 506
489 wb_shutdown(wb); 507 wb_shutdown(wb);
490 508
@@ -495,9 +513,6 @@ static void cgwb_release_workfn(struct work_struct *work)
495 percpu_ref_exit(&wb->refcnt); 513 percpu_ref_exit(&wb->refcnt);
496 wb_exit(wb); 514 wb_exit(wb);
497 kfree_rcu(wb, rcu); 515 kfree_rcu(wb, rcu);
498
499 if (atomic_dec_and_test(&bdi->usage_cnt))
500 wake_up_all(&cgwb_release_wait);
501} 516}
502 517
503static void cgwb_release(struct percpu_ref *refcnt) 518static void cgwb_release(struct percpu_ref *refcnt)
@@ -517,6 +532,13 @@ static void cgwb_kill(struct bdi_writeback *wb)
517 percpu_ref_kill(&wb->refcnt); 532 percpu_ref_kill(&wb->refcnt);
518} 533}
519 534
535static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
536{
537 spin_lock_irq(&cgwb_lock);
538 list_del_rcu(&wb->bdi_node);
539 spin_unlock_irq(&cgwb_lock);
540}
541
520static int cgwb_create(struct backing_dev_info *bdi, 542static int cgwb_create(struct backing_dev_info *bdi,
521 struct cgroup_subsys_state *memcg_css, gfp_t gfp) 543 struct cgroup_subsys_state *memcg_css, gfp_t gfp)
522{ 544{
@@ -580,7 +602,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
580 /* we might have raced another instance of this function */ 602 /* we might have raced another instance of this function */
581 ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb); 603 ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
582 if (!ret) { 604 if (!ret) {
583 atomic_inc(&bdi->usage_cnt);
584 list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); 605 list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
585 list_add(&wb->memcg_node, memcg_cgwb_list); 606 list_add(&wb->memcg_node, memcg_cgwb_list);
586 list_add(&wb->blkcg_node, blkcg_cgwb_list); 607 list_add(&wb->blkcg_node, blkcg_cgwb_list);
@@ -670,7 +691,6 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
670 691
671 INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); 692 INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
672 bdi->cgwb_congested_tree = RB_ROOT; 693 bdi->cgwb_congested_tree = RB_ROOT;
673 atomic_set(&bdi->usage_cnt, 1);
674 694
675 ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 695 ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
676 if (!ret) { 696 if (!ret) {
@@ -680,29 +700,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
680 return ret; 700 return ret;
681} 701}
682 702
683static void cgwb_bdi_destroy(struct backing_dev_info *bdi) 703static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
684{ 704{
685 struct radix_tree_iter iter; 705 struct radix_tree_iter iter;
686 void **slot; 706 void **slot;
707 struct bdi_writeback *wb;
687 708
688 WARN_ON(test_bit(WB_registered, &bdi->wb.state)); 709 WARN_ON(test_bit(WB_registered, &bdi->wb.state));
689 710
690 spin_lock_irq(&cgwb_lock); 711 spin_lock_irq(&cgwb_lock);
691 radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) 712 radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
692 cgwb_kill(*slot); 713 cgwb_kill(*slot);
693 spin_unlock_irq(&cgwb_lock);
694 714
695 /* 715 while (!list_empty(&bdi->wb_list)) {
696 * All cgwb's must be shutdown and released before returning. Drain 716 wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
697 * the usage counter to wait for all cgwb's ever created on @bdi. 717 bdi_node);
698 */ 718 spin_unlock_irq(&cgwb_lock);
699 atomic_dec(&bdi->usage_cnt); 719 wb_shutdown(wb);
700 wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); 720 spin_lock_irq(&cgwb_lock);
701 /* 721 }
702 * Grab back our reference so that we hold it when @bdi gets 722 spin_unlock_irq(&cgwb_lock);
703 * re-registered.
704 */
705 atomic_inc(&bdi->usage_cnt);
706} 723}
707 724
708/** 725/**
@@ -752,11 +769,18 @@ static void cgwb_bdi_exit(struct backing_dev_info *bdi)
752 rb_entry(rbn, struct bdi_writeback_congested, rb_node); 769 rb_entry(rbn, struct bdi_writeback_congested, rb_node);
753 770
754 rb_erase(rbn, &bdi->cgwb_congested_tree); 771 rb_erase(rbn, &bdi->cgwb_congested_tree);
755 congested->bdi = NULL; /* mark @congested unlinked */ 772 congested->__bdi = NULL; /* mark @congested unlinked */
756 } 773 }
757 spin_unlock_irq(&cgwb_lock); 774 spin_unlock_irq(&cgwb_lock);
758} 775}
759 776
777static void cgwb_bdi_register(struct backing_dev_info *bdi)
778{
779 spin_lock_irq(&cgwb_lock);
780 list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
781 spin_unlock_irq(&cgwb_lock);
782}
783
760#else /* CONFIG_CGROUP_WRITEBACK */ 784#else /* CONFIG_CGROUP_WRITEBACK */
761 785
762static int cgwb_bdi_init(struct backing_dev_info *bdi) 786static int cgwb_bdi_init(struct backing_dev_info *bdi)
@@ -777,16 +801,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
777 return 0; 801 return 0;
778} 802}
779 803
780static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } 804static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
781 805
782static void cgwb_bdi_exit(struct backing_dev_info *bdi) 806static void cgwb_bdi_exit(struct backing_dev_info *bdi)
783{ 807{
784 wb_congested_put(bdi->wb_congested); 808 wb_congested_put(bdi->wb_congested);
785} 809}
786 810
811static void cgwb_bdi_register(struct backing_dev_info *bdi)
812{
813 list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
814}
815
816static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
817{
818 list_del_rcu(&wb->bdi_node);
819}
820
787#endif /* CONFIG_CGROUP_WRITEBACK */ 821#endif /* CONFIG_CGROUP_WRITEBACK */
788 822
789int bdi_init(struct backing_dev_info *bdi) 823static int bdi_init(struct backing_dev_info *bdi)
790{ 824{
791 int ret; 825 int ret;
792 826
@@ -802,11 +836,8 @@ int bdi_init(struct backing_dev_info *bdi)
802 836
803 ret = cgwb_bdi_init(bdi); 837 ret = cgwb_bdi_init(bdi);
804 838
805 list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
806
807 return ret; 839 return ret;
808} 840}
809EXPORT_SYMBOL(bdi_init);
810 841
811struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) 842struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
812{ 843{
@@ -823,22 +854,20 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
823 } 854 }
824 return bdi; 855 return bdi;
825} 856}
857EXPORT_SYMBOL(bdi_alloc_node);
826 858
827int bdi_register(struct backing_dev_info *bdi, struct device *parent, 859int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
828 const char *fmt, ...)
829{ 860{
830 va_list args;
831 struct device *dev; 861 struct device *dev;
832 862
833 if (bdi->dev) /* The driver needs to use separate queues per device */ 863 if (bdi->dev) /* The driver needs to use separate queues per device */
834 return 0; 864 return 0;
835 865
836 va_start(args, fmt); 866 dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
837 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
838 va_end(args);
839 if (IS_ERR(dev)) 867 if (IS_ERR(dev))
840 return PTR_ERR(dev); 868 return PTR_ERR(dev);
841 869
870 cgwb_bdi_register(bdi);
842 bdi->dev = dev; 871 bdi->dev = dev;
843 872
844 bdi_debug_register(bdi, dev_name(dev)); 873 bdi_debug_register(bdi, dev_name(dev));
@@ -851,20 +880,25 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
851 trace_writeback_bdi_register(bdi); 880 trace_writeback_bdi_register(bdi);
852 return 0; 881 return 0;
853} 882}
854EXPORT_SYMBOL(bdi_register); 883EXPORT_SYMBOL(bdi_register_va);
855 884
856int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) 885int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
857{ 886{
858 return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); 887 va_list args;
888 int ret;
889
890 va_start(args, fmt);
891 ret = bdi_register_va(bdi, fmt, args);
892 va_end(args);
893 return ret;
859} 894}
860EXPORT_SYMBOL(bdi_register_dev); 895EXPORT_SYMBOL(bdi_register);
861 896
862int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner) 897int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
863{ 898{
864 int rc; 899 int rc;
865 900
866 rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt), 901 rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
867 MINOR(owner->devt));
868 if (rc) 902 if (rc)
869 return rc; 903 return rc;
870 /* Leaking owner reference... */ 904 /* Leaking owner reference... */
@@ -892,7 +926,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
892 /* make sure nobody finds us on the bdi_list anymore */ 926 /* make sure nobody finds us on the bdi_list anymore */
893 bdi_remove_from_list(bdi); 927 bdi_remove_from_list(bdi);
894 wb_shutdown(&bdi->wb); 928 wb_shutdown(&bdi->wb);
895 cgwb_bdi_destroy(bdi); 929 cgwb_bdi_unregister(bdi);
896 930
897 if (bdi->dev) { 931 if (bdi->dev) {
898 bdi_debug_unregister(bdi); 932 bdi_debug_unregister(bdi);
@@ -906,19 +940,16 @@ void bdi_unregister(struct backing_dev_info *bdi)
906 } 940 }
907} 941}
908 942
909static void bdi_exit(struct backing_dev_info *bdi)
910{
911 WARN_ON_ONCE(bdi->dev);
912 wb_exit(&bdi->wb);
913 cgwb_bdi_exit(bdi);
914}
915
916static void release_bdi(struct kref *ref) 943static void release_bdi(struct kref *ref)
917{ 944{
918 struct backing_dev_info *bdi = 945 struct backing_dev_info *bdi =
919 container_of(ref, struct backing_dev_info, refcnt); 946 container_of(ref, struct backing_dev_info, refcnt);
920 947
921 bdi_exit(bdi); 948 if (test_bit(WB_registered, &bdi->wb.state))
949 bdi_unregister(bdi);
950 WARN_ON_ONCE(bdi->dev);
951 wb_exit(&bdi->wb);
952 cgwb_bdi_exit(bdi);
922 kfree(bdi); 953 kfree(bdi);
923} 954}
924 955
@@ -926,38 +957,7 @@ void bdi_put(struct backing_dev_info *bdi)
926{ 957{
927 kref_put(&bdi->refcnt, release_bdi); 958 kref_put(&bdi->refcnt, release_bdi);
928} 959}
929 960EXPORT_SYMBOL(bdi_put);
930void bdi_destroy(struct backing_dev_info *bdi)
931{
932 bdi_unregister(bdi);
933 bdi_exit(bdi);
934}
935EXPORT_SYMBOL(bdi_destroy);
936
937/*
938 * For use from filesystems to quickly init and register a bdi associated
939 * with dirty writeback
940 */
941int bdi_setup_and_register(struct backing_dev_info *bdi, char *name)
942{
943 int err;
944
945 bdi->name = name;
946 bdi->capabilities = 0;
947 err = bdi_init(bdi);
948 if (err)
949 return err;
950
951 err = bdi_register(bdi, NULL, "%.28s-%ld", name,
952 atomic_long_inc_return(&bdi_seq));
953 if (err) {
954 bdi_destroy(bdi);
955 return err;
956 }
957
958 return 0;
959}
960EXPORT_SYMBOL(bdi_setup_and_register);
961 961
962static wait_queue_head_t congestion_wqh[2] = { 962static wait_queue_head_t congestion_wqh[2] = {
963 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 963 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),