aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c79
1 files changed, 48 insertions, 31 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2e38cfac5b1d..9c4f7659f8b1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
103static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) 103static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
104{ 104{
105 int i; 105 int i;
106 local_irq_disable(); 106 spin_lock_irq(conf->hash_locks);
107 spin_lock(conf->hash_locks);
108 for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) 107 for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
109 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); 108 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
110 spin_lock(&conf->device_lock); 109 spin_lock(&conf->device_lock);
@@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
114{ 113{
115 int i; 114 int i;
116 spin_unlock(&conf->device_lock); 115 spin_unlock(&conf->device_lock);
117 for (i = NR_STRIPE_HASH_LOCKS; i; i--) 116 for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
118 spin_unlock(conf->hash_locks + i - 1); 117 spin_unlock(conf->hash_locks + i);
119 local_irq_enable(); 118 spin_unlock_irq(conf->hash_locks);
120} 119}
121 120
122/* Find first data disk in a raid6 stripe */ 121/* Find first data disk in a raid6 stripe */
@@ -234,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
234 if (test_bit(R5_InJournal, &sh->dev[i].flags)) 233 if (test_bit(R5_InJournal, &sh->dev[i].flags))
235 injournal++; 234 injournal++;
236 /* 235 /*
237 * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with 236 * In the following cases, the stripe cannot be released to cached
238 * data in journal, so they are not released to cached lists 237 * lists. Therefore, we make the stripe write out and set
238 * STRIPE_HANDLE:
239 * 1. when quiesce in r5c write back;
240 * 2. when resync is requested fot the stripe.
239 */ 241 */
240 if (conf->quiesce && r5c_is_writeback(conf->log) && 242 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
241 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) { 243 (conf->quiesce && r5c_is_writeback(conf->log) &&
244 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
242 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) 245 if (test_bit(STRIPE_R5C_CACHING, &sh->state))
243 r5c_make_stripe_write_out(sh); 246 r5c_make_stripe_write_out(sh);
244 set_bit(STRIPE_HANDLE, &sh->state); 247 set_bit(STRIPE_HANDLE, &sh->state);
@@ -714,12 +717,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
714 717
715static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 718static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
716{ 719{
717 local_irq_disable();
718 if (sh1 > sh2) { 720 if (sh1 > sh2) {
719 spin_lock(&sh2->stripe_lock); 721 spin_lock_irq(&sh2->stripe_lock);
720 spin_lock_nested(&sh1->stripe_lock, 1); 722 spin_lock_nested(&sh1->stripe_lock, 1);
721 } else { 723 } else {
722 spin_lock(&sh1->stripe_lock); 724 spin_lock_irq(&sh1->stripe_lock);
723 spin_lock_nested(&sh2->stripe_lock, 1); 725 spin_lock_nested(&sh2->stripe_lock, 1);
724 } 726 }
725} 727}
@@ -727,8 +729,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
727static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 729static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
728{ 730{
729 spin_unlock(&sh1->stripe_lock); 731 spin_unlock(&sh1->stripe_lock);
730 spin_unlock(&sh2->stripe_lock); 732 spin_unlock_irq(&sh2->stripe_lock);
731 local_irq_enable();
732} 733}
733 734
734/* Only freshly new full stripe normal write stripe can be added to a batch list */ 735/* Only freshly new full stripe normal write stripe can be added to a batch list */
@@ -2312,14 +2313,12 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2312 struct stripe_head *osh, *nsh; 2313 struct stripe_head *osh, *nsh;
2313 LIST_HEAD(newstripes); 2314 LIST_HEAD(newstripes);
2314 struct disk_info *ndisks; 2315 struct disk_info *ndisks;
2315 int err; 2316 int err = 0;
2316 struct kmem_cache *sc; 2317 struct kmem_cache *sc;
2317 int i; 2318 int i;
2318 int hash, cnt; 2319 int hash, cnt;
2319 2320
2320 err = md_allow_write(conf->mddev); 2321 md_allow_write(conf->mddev);
2321 if (err)
2322 return err;
2323 2322
2324 /* Step 1 */ 2323 /* Step 1 */
2325 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], 2324 sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
@@ -2694,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
2694 bdevname(rdev->bdev, b), 2693 bdevname(rdev->bdev, b),
2695 mdname(mddev), 2694 mdname(mddev),
2696 conf->raid_disks - mddev->degraded); 2695 conf->raid_disks - mddev->degraded);
2697 r5c_update_on_rdev_error(mddev); 2696 r5c_update_on_rdev_error(mddev, rdev);
2698} 2697}
2699 2698
2700/* 2699/*
@@ -3055,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
3055 * When LOG_CRITICAL, stripes with injournal == 0 will be sent to 3054 * When LOG_CRITICAL, stripes with injournal == 0 will be sent to
3056 * no_space_stripes list. 3055 * no_space_stripes list.
3057 * 3056 *
3057 * 3. during journal failure
3058 * In journal failure, we try to flush all cached data to raid disks
3059 * based on data in stripe cache. The array is read-only to upper
3060 * layers, so we would skip all pending writes.
3061 *
3058 */ 3062 */
3059static inline bool delay_towrite(struct r5conf *conf, 3063static inline bool delay_towrite(struct r5conf *conf,
3060 struct r5dev *dev, 3064 struct r5dev *dev,
@@ -3068,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf,
3068 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && 3072 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
3069 s->injournal > 0) 3073 s->injournal > 0)
3070 return true; 3074 return true;
3075 /* case 3 above */
3076 if (s->log_failed && s->injournal)
3077 return true;
3071 return false; 3078 return false;
3072} 3079}
3073 3080
@@ -4653,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh)
4653 4660
4654 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { 4661 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
4655 spin_lock(&sh->stripe_lock); 4662 spin_lock(&sh->stripe_lock);
4656 /* Cannot process 'sync' concurrently with 'discard' */ 4663 /*
4657 if (!test_bit(STRIPE_DISCARD, &sh->state) && 4664 * Cannot process 'sync' concurrently with 'discard'.
4665 * Flush data in r5cache before 'sync'.
4666 */
4667 if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
4668 !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
4669 !test_bit(STRIPE_DISCARD, &sh->state) &&
4658 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 4670 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
4659 set_bit(STRIPE_SYNCING, &sh->state); 4671 set_bit(STRIPE_SYNCING, &sh->state);
4660 clear_bit(STRIPE_INSYNC, &sh->state); 4672 clear_bit(STRIPE_INSYNC, &sh->state);
@@ -4701,10 +4713,15 @@ static void handle_stripe(struct stripe_head *sh)
4701 " to_write=%d failed=%d failed_num=%d,%d\n", 4713 " to_write=%d failed=%d failed_num=%d,%d\n",
4702 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 4714 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
4703 s.failed_num[0], s.failed_num[1]); 4715 s.failed_num[0], s.failed_num[1]);
4704 /* check if the array has lost more than max_degraded devices and, 4716 /*
4717 * check if the array has lost more than max_degraded devices and,
4705 * if so, some requests might need to be failed. 4718 * if so, some requests might need to be failed.
4719 *
4720 * When journal device failed (log_failed), we will only process
4721 * the stripe if there is data need write to raid disks
4706 */ 4722 */
4707 if (s.failed > conf->max_degraded || s.log_failed) { 4723 if (s.failed > conf->max_degraded ||
4724 (s.log_failed && s.injournal == 0)) {
4708 sh->check_state = 0; 4725 sh->check_state = 0;
4709 sh->reconstruct_state = 0; 4726 sh->reconstruct_state = 0;
4710 break_stripe_batch_list(sh, 0); 4727 break_stripe_batch_list(sh, 0);
@@ -5277,8 +5294,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
5277 struct stripe_head *sh, *tmp; 5294 struct stripe_head *sh, *tmp;
5278 struct list_head *handle_list = NULL; 5295 struct list_head *handle_list = NULL;
5279 struct r5worker_group *wg; 5296 struct r5worker_group *wg;
5280 bool second_try = !r5c_is_writeback(conf->log); 5297 bool second_try = !r5c_is_writeback(conf->log) &&
5281 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); 5298 !r5l_log_disk_error(conf);
5299 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
5300 r5l_log_disk_error(conf);
5282 5301
5283again: 5302again:
5284 wg = NULL; 5303 wg = NULL;
@@ -6313,7 +6332,6 @@ int
6313raid5_set_cache_size(struct mddev *mddev, int size) 6332raid5_set_cache_size(struct mddev *mddev, int size)
6314{ 6333{
6315 struct r5conf *conf = mddev->private; 6334 struct r5conf *conf = mddev->private;
6316 int err;
6317 6335
6318 if (size <= 16 || size > 32768) 6336 if (size <= 16 || size > 32768)
6319 return -EINVAL; 6337 return -EINVAL;
@@ -6325,10 +6343,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
6325 ; 6343 ;
6326 mutex_unlock(&conf->cache_size_mutex); 6344 mutex_unlock(&conf->cache_size_mutex);
6327 6345
6328 6346 md_allow_write(mddev);
6329 err = md_allow_write(mddev);
6330 if (err)
6331 return err;
6332 6347
6333 mutex_lock(&conf->cache_size_mutex); 6348 mutex_lock(&conf->cache_size_mutex);
6334 while (size > conf->max_nr_stripes) 6349 while (size > conf->max_nr_stripes)
@@ -7530,7 +7545,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
7530 * neilb: there is no locking about new writes here, 7545 * neilb: there is no locking about new writes here,
7531 * so this cannot be safe. 7546 * so this cannot be safe.
7532 */ 7547 */
7533 if (atomic_read(&conf->active_stripes)) { 7548 if (atomic_read(&conf->active_stripes) ||
7549 atomic_read(&conf->r5c_cached_full_stripes) ||
7550 atomic_read(&conf->r5c_cached_partial_stripes)) {
7534 return -EBUSY; 7551 return -EBUSY;
7535 } 7552 }
7536 log_exit(conf); 7553 log_exit(conf);