aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-09-13 14:19:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-09-13 14:19:52 -0400
commit106f2e59ee3b89a2f93735f65499eae4e8d55abc (patch)
tree113f2892a5ebf0ba2b3be38133b47df2e3795c2d
parent309a18ae360d1d8741c676a37a3daae319fe722a (diff)
parentc94455558337eece474eebb6a16b905f98930418 (diff)
Merge tag 'md/4.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: "A few bug fixes for MD: - Guoqing fixed a bug compiling md-cluster in kernel - I fixed a potential deadlock in raid5-cache superblock write, a hang in raid5 reshape resume and a race condition introduced in rc4" * tag 'md/4.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: raid5: fix a small race condition md-cluster: make md-cluster also can work when compiled into kernel raid5: guarantee enough stripes to avoid reshape hang raid5-cache: fix a deadlock in superblock write
-rw-r--r--drivers/md/md.c12
-rw-r--r--drivers/md/raid5-cache.c46
-rw-r--r--drivers/md/raid5.c14
3 files changed, 31 insertions, 41 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 67642bacd597..915e84d631a2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
7610 7610
7611int md_setup_cluster(struct mddev *mddev, int nodes) 7611int md_setup_cluster(struct mddev *mddev, int nodes)
7612{ 7612{
7613 int err; 7613 if (!md_cluster_ops)
7614 7614 request_module("md-cluster");
7615 err = request_module("md-cluster");
7616 if (err) {
7617 pr_err("md-cluster module not found.\n");
7618 return -ENOENT;
7619 }
7620
7621 spin_lock(&pers_lock); 7615 spin_lock(&pers_lock);
7616 /* ensure module won't be unloaded */
7622 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { 7617 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7618 pr_err("can't find md-cluster module or get it's reference.\n");
7623 spin_unlock(&pers_lock); 7619 spin_unlock(&pers_lock);
7624 return -ENOENT; 7620 return -ENOENT;
7625 } 7621 }
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 51f76ddbe265..1b1ab4a1d132 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -96,7 +96,6 @@ struct r5l_log {
96 spinlock_t no_space_stripes_lock; 96 spinlock_t no_space_stripes_lock;
97 97
98 bool need_cache_flush; 98 bool need_cache_flush;
99 bool in_teardown;
100}; 99};
101 100
102/* 101/*
@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
704 703
705 mddev = log->rdev->mddev; 704 mddev = log->rdev->mddev;
706 /* 705 /*
707 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and 706 * Discard could zero data, so before discard we must make sure
708 * wait for this thread to finish. This thread waits for 707 * superblock is updated to new log tail. Updating superblock (either
709 * MD_CHANGE_PENDING clear, which is supposed to be done in 708 * directly call md_update_sb() or depend on md thread) must hold
710 * md_check_recovery(). md_check_recovery() tries to get 709 * reconfig mutex. On the other hand, raid5_quiesce is called with
711 * reconfig_mutex. Since r5l_quiesce already holds the mutex, 710 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
712 * md_check_recovery() fails, so the PENDING never get cleared. The 711 * for all IO finish, hence waitting for reclaim thread, while reclaim
713 * in_teardown check workaround this issue. 712 * thread is calling this function and waitting for reconfig mutex. So
713 * there is a deadlock. We workaround this issue with a trylock.
714 * FIXME: we could miss discard if we can't take reconfig mutex
714 */ 715 */
715 if (!log->in_teardown) { 716 set_mask_bits(&mddev->flags, 0,
716 set_mask_bits(&mddev->flags, 0, 717 BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
717 BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); 718 if (!mddev_trylock(mddev))
718 md_wakeup_thread(mddev->thread); 719 return;
719 wait_event(mddev->sb_wait, 720 md_update_sb(mddev, 1);
720 !test_bit(MD_CHANGE_PENDING, &mddev->flags) || 721 mddev_unlock(mddev);
721 log->in_teardown);
722 /*
723 * r5l_quiesce could run after in_teardown check and hold
724 * mutex first. Superblock might get updated twice.
725 */
726 if (log->in_teardown)
727 md_update_sb(mddev, 1);
728 } else {
729 WARN_ON(!mddev_is_locked(mddev));
730 md_update_sb(mddev, 1);
731 }
732 722
733 /* discard IO error really doesn't matter, ignore it */ 723 /* discard IO error really doesn't matter, ignore it */
734 if (log->last_checkpoint < end) { 724 if (log->last_checkpoint < end) {
@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
827 if (!log || state == 2) 817 if (!log || state == 2)
828 return; 818 return;
829 if (state == 0) { 819 if (state == 0) {
830 log->in_teardown = 0;
831 /* 820 /*
832 * This is a special case for hotadd. In suspend, the array has 821 * This is a special case for hotadd. In suspend, the array has
833 * no journal. In resume, journal is initialized as well as the 822 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
838 log->reclaim_thread = md_register_thread(r5l_reclaim_thread, 827 log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
839 log->rdev->mddev, "reclaim"); 828 log->rdev->mddev, "reclaim");
840 } else if (state == 1) { 829 } else if (state == 1) {
841 /*
842 * at this point all stripes are finished, so io_unit is at
843 * least in STRIPE_END state
844 */
845 log->in_teardown = 1;
846 /* make sure r5l_write_super_and_discard_space exits */ 830 /* make sure r5l_write_super_and_discard_space exits */
847 mddev = log->rdev->mddev; 831 mddev = log->rdev->mddev;
848 wake_up(&mddev->sb_wait); 832 wake_up(&mddev->sb_wait);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index da583bb43c84..ee7fc3701700 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi)
2423 } 2423 }
2424 } 2424 }
2425 rdev_dec_pending(rdev, conf->mddev); 2425 rdev_dec_pending(rdev, conf->mddev);
2426 bio_reset(bi);
2426 clear_bit(R5_LOCKED, &sh->dev[i].flags); 2427 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2427 set_bit(STRIPE_HANDLE, &sh->state); 2428 set_bit(STRIPE_HANDLE, &sh->state);
2428 raid5_release_stripe(sh); 2429 raid5_release_stripe(sh);
2429 bio_reset(bi);
2430} 2430}
2431 2431
2432static void raid5_end_write_request(struct bio *bi) 2432static void raid5_end_write_request(struct bio *bi)
@@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi)
2498 if (sh->batch_head && bi->bi_error && !replacement) 2498 if (sh->batch_head && bi->bi_error && !replacement)
2499 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); 2499 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
2500 2500
2501 bio_reset(bi);
2501 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) 2502 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
2502 clear_bit(R5_LOCKED, &sh->dev[i].flags); 2503 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2503 set_bit(STRIPE_HANDLE, &sh->state); 2504 set_bit(STRIPE_HANDLE, &sh->state);
@@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi)
2505 2506
2506 if (sh->batch_head && sh != sh->batch_head) 2507 if (sh->batch_head && sh != sh->batch_head)
2507 raid5_release_stripe(sh->batch_head); 2508 raid5_release_stripe(sh->batch_head);
2508 bio_reset(bi);
2509} 2509}
2510 2510
2511static void raid5_build_block(struct stripe_head *sh, int i, int previous) 2511static void raid5_build_block(struct stripe_head *sh, int i, int previous)
@@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6639 } 6639 }
6640 6640
6641 conf->min_nr_stripes = NR_STRIPES; 6641 conf->min_nr_stripes = NR_STRIPES;
6642 if (mddev->reshape_position != MaxSector) {
6643 int stripes = max_t(int,
6644 ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
6645 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
6646 conf->min_nr_stripes = max(NR_STRIPES, stripes);
6647 if (conf->min_nr_stripes != NR_STRIPES)
6648 printk(KERN_INFO
6649 "md/raid:%s: force stripe size %d for reshape\n",
6650 mdname(mddev), conf->min_nr_stripes);
6651 }
6642 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + 6652 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
6643 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; 6653 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
6644 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); 6654 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);