diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2e38cfac5b1d..9c4f7659f8b1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -103,8 +103,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) | |||
103 | static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) | 103 | static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) |
104 | { | 104 | { |
105 | int i; | 105 | int i; |
106 | local_irq_disable(); | 106 | spin_lock_irq(conf->hash_locks); |
107 | spin_lock(conf->hash_locks); | ||
108 | for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) | 107 | for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) |
109 | spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); | 108 | spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); |
110 | spin_lock(&conf->device_lock); | 109 | spin_lock(&conf->device_lock); |
@@ -114,9 +113,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) | |||
114 | { | 113 | { |
115 | int i; | 114 | int i; |
116 | spin_unlock(&conf->device_lock); | 115 | spin_unlock(&conf->device_lock); |
117 | for (i = NR_STRIPE_HASH_LOCKS; i; i--) | 116 | for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) |
118 | spin_unlock(conf->hash_locks + i - 1); | 117 | spin_unlock(conf->hash_locks + i); |
119 | local_irq_enable(); | 118 | spin_unlock_irq(conf->hash_locks); |
120 | } | 119 | } |
121 | 120 | ||
122 | /* Find first data disk in a raid6 stripe */ | 121 | /* Find first data disk in a raid6 stripe */ |
@@ -234,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
234 | if (test_bit(R5_InJournal, &sh->dev[i].flags)) | 233 | if (test_bit(R5_InJournal, &sh->dev[i].flags)) |
235 | injournal++; | 234 | injournal++; |
236 | /* | 235 | /* |
237 | * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with | 236 | * In the following cases, the stripe cannot be released to cached |
238 | * data in journal, so they are not released to cached lists | 237 | * lists. Therefore, we make the stripe write out and set |
238 | * STRIPE_HANDLE: | ||
239 | * 1. when quiesce in r5c write back; | ||
240 | * 2. when resync is requested fot the stripe. | ||
239 | */ | 241 | */ |
240 | if (conf->quiesce && r5c_is_writeback(conf->log) && | 242 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || |
241 | !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) { | 243 | (conf->quiesce && r5c_is_writeback(conf->log) && |
244 | !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { | ||
242 | if (test_bit(STRIPE_R5C_CACHING, &sh->state)) | 245 | if (test_bit(STRIPE_R5C_CACHING, &sh->state)) |
243 | r5c_make_stripe_write_out(sh); | 246 | r5c_make_stripe_write_out(sh); |
244 | set_bit(STRIPE_HANDLE, &sh->state); | 247 | set_bit(STRIPE_HANDLE, &sh->state); |
@@ -714,12 +717,11 @@ static bool is_full_stripe_write(struct stripe_head *sh) | |||
714 | 717 | ||
715 | static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | 718 | static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) |
716 | { | 719 | { |
717 | local_irq_disable(); | ||
718 | if (sh1 > sh2) { | 720 | if (sh1 > sh2) { |
719 | spin_lock(&sh2->stripe_lock); | 721 | spin_lock_irq(&sh2->stripe_lock); |
720 | spin_lock_nested(&sh1->stripe_lock, 1); | 722 | spin_lock_nested(&sh1->stripe_lock, 1); |
721 | } else { | 723 | } else { |
722 | spin_lock(&sh1->stripe_lock); | 724 | spin_lock_irq(&sh1->stripe_lock); |
723 | spin_lock_nested(&sh2->stripe_lock, 1); | 725 | spin_lock_nested(&sh2->stripe_lock, 1); |
724 | } | 726 | } |
725 | } | 727 | } |
@@ -727,8 +729,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | |||
727 | static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | 729 | static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) |
728 | { | 730 | { |
729 | spin_unlock(&sh1->stripe_lock); | 731 | spin_unlock(&sh1->stripe_lock); |
730 | spin_unlock(&sh2->stripe_lock); | 732 | spin_unlock_irq(&sh2->stripe_lock); |
731 | local_irq_enable(); | ||
732 | } | 733 | } |
733 | 734 | ||
734 | /* Only freshly new full stripe normal write stripe can be added to a batch list */ | 735 | /* Only freshly new full stripe normal write stripe can be added to a batch list */ |
@@ -2312,14 +2313,12 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2312 | struct stripe_head *osh, *nsh; | 2313 | struct stripe_head *osh, *nsh; |
2313 | LIST_HEAD(newstripes); | 2314 | LIST_HEAD(newstripes); |
2314 | struct disk_info *ndisks; | 2315 | struct disk_info *ndisks; |
2315 | int err; | 2316 | int err = 0; |
2316 | struct kmem_cache *sc; | 2317 | struct kmem_cache *sc; |
2317 | int i; | 2318 | int i; |
2318 | int hash, cnt; | 2319 | int hash, cnt; |
2319 | 2320 | ||
2320 | err = md_allow_write(conf->mddev); | 2321 | md_allow_write(conf->mddev); |
2321 | if (err) | ||
2322 | return err; | ||
2323 | 2322 | ||
2324 | /* Step 1 */ | 2323 | /* Step 1 */ |
2325 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], | 2324 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], |
@@ -2694,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) | |||
2694 | bdevname(rdev->bdev, b), | 2693 | bdevname(rdev->bdev, b), |
2695 | mdname(mddev), | 2694 | mdname(mddev), |
2696 | conf->raid_disks - mddev->degraded); | 2695 | conf->raid_disks - mddev->degraded); |
2697 | r5c_update_on_rdev_error(mddev); | 2696 | r5c_update_on_rdev_error(mddev, rdev); |
2698 | } | 2697 | } |
2699 | 2698 | ||
2700 | /* | 2699 | /* |
@@ -3055,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
3055 | * When LOG_CRITICAL, stripes with injournal == 0 will be sent to | 3054 | * When LOG_CRITICAL, stripes with injournal == 0 will be sent to |
3056 | * no_space_stripes list. | 3055 | * no_space_stripes list. |
3057 | * | 3056 | * |
3057 | * 3. during journal failure | ||
3058 | * In journal failure, we try to flush all cached data to raid disks | ||
3059 | * based on data in stripe cache. The array is read-only to upper | ||
3060 | * layers, so we would skip all pending writes. | ||
3061 | * | ||
3058 | */ | 3062 | */ |
3059 | static inline bool delay_towrite(struct r5conf *conf, | 3063 | static inline bool delay_towrite(struct r5conf *conf, |
3060 | struct r5dev *dev, | 3064 | struct r5dev *dev, |
@@ -3068,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf, | |||
3068 | if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && | 3072 | if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && |
3069 | s->injournal > 0) | 3073 | s->injournal > 0) |
3070 | return true; | 3074 | return true; |
3075 | /* case 3 above */ | ||
3076 | if (s->log_failed && s->injournal) | ||
3077 | return true; | ||
3071 | return false; | 3078 | return false; |
3072 | } | 3079 | } |
3073 | 3080 | ||
@@ -4653,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh) | |||
4653 | 4660 | ||
4654 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { | 4661 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { |
4655 | spin_lock(&sh->stripe_lock); | 4662 | spin_lock(&sh->stripe_lock); |
4656 | /* Cannot process 'sync' concurrently with 'discard' */ | 4663 | /* |
4657 | if (!test_bit(STRIPE_DISCARD, &sh->state) && | 4664 | * Cannot process 'sync' concurrently with 'discard'. |
4665 | * Flush data in r5cache before 'sync'. | ||
4666 | */ | ||
4667 | if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && | ||
4668 | !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && | ||
4669 | !test_bit(STRIPE_DISCARD, &sh->state) && | ||
4658 | test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { | 4670 | test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { |
4659 | set_bit(STRIPE_SYNCING, &sh->state); | 4671 | set_bit(STRIPE_SYNCING, &sh->state); |
4660 | clear_bit(STRIPE_INSYNC, &sh->state); | 4672 | clear_bit(STRIPE_INSYNC, &sh->state); |
@@ -4701,10 +4713,15 @@ static void handle_stripe(struct stripe_head *sh) | |||
4701 | " to_write=%d failed=%d failed_num=%d,%d\n", | 4713 | " to_write=%d failed=%d failed_num=%d,%d\n", |
4702 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, | 4714 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, |
4703 | s.failed_num[0], s.failed_num[1]); | 4715 | s.failed_num[0], s.failed_num[1]); |
4704 | /* check if the array has lost more than max_degraded devices and, | 4716 | /* |
4717 | * check if the array has lost more than max_degraded devices and, | ||
4705 | * if so, some requests might need to be failed. | 4718 | * if so, some requests might need to be failed. |
4719 | * | ||
4720 | * When journal device failed (log_failed), we will only process | ||
4721 | * the stripe if there is data need write to raid disks | ||
4706 | */ | 4722 | */ |
4707 | if (s.failed > conf->max_degraded || s.log_failed) { | 4723 | if (s.failed > conf->max_degraded || |
4724 | (s.log_failed && s.injournal == 0)) { | ||
4708 | sh->check_state = 0; | 4725 | sh->check_state = 0; |
4709 | sh->reconstruct_state = 0; | 4726 | sh->reconstruct_state = 0; |
4710 | break_stripe_batch_list(sh, 0); | 4727 | break_stripe_batch_list(sh, 0); |
@@ -5277,8 +5294,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) | |||
5277 | struct stripe_head *sh, *tmp; | 5294 | struct stripe_head *sh, *tmp; |
5278 | struct list_head *handle_list = NULL; | 5295 | struct list_head *handle_list = NULL; |
5279 | struct r5worker_group *wg; | 5296 | struct r5worker_group *wg; |
5280 | bool second_try = !r5c_is_writeback(conf->log); | 5297 | bool second_try = !r5c_is_writeback(conf->log) && |
5281 | bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); | 5298 | !r5l_log_disk_error(conf); |
5299 | bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || | ||
5300 | r5l_log_disk_error(conf); | ||
5282 | 5301 | ||
5283 | again: | 5302 | again: |
5284 | wg = NULL; | 5303 | wg = NULL; |
@@ -6313,7 +6332,6 @@ int | |||
6313 | raid5_set_cache_size(struct mddev *mddev, int size) | 6332 | raid5_set_cache_size(struct mddev *mddev, int size) |
6314 | { | 6333 | { |
6315 | struct r5conf *conf = mddev->private; | 6334 | struct r5conf *conf = mddev->private; |
6316 | int err; | ||
6317 | 6335 | ||
6318 | if (size <= 16 || size > 32768) | 6336 | if (size <= 16 || size > 32768) |
6319 | return -EINVAL; | 6337 | return -EINVAL; |
@@ -6325,10 +6343,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) | |||
6325 | ; | 6343 | ; |
6326 | mutex_unlock(&conf->cache_size_mutex); | 6344 | mutex_unlock(&conf->cache_size_mutex); |
6327 | 6345 | ||
6328 | 6346 | md_allow_write(mddev); | |
6329 | err = md_allow_write(mddev); | ||
6330 | if (err) | ||
6331 | return err; | ||
6332 | 6347 | ||
6333 | mutex_lock(&conf->cache_size_mutex); | 6348 | mutex_lock(&conf->cache_size_mutex); |
6334 | while (size > conf->max_nr_stripes) | 6349 | while (size > conf->max_nr_stripes) |
@@ -7530,7 +7545,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
7530 | * neilb: there is no locking about new writes here, | 7545 | * neilb: there is no locking about new writes here, |
7531 | * so this cannot be safe. | 7546 | * so this cannot be safe. |
7532 | */ | 7547 | */ |
7533 | if (atomic_read(&conf->active_stripes)) { | 7548 | if (atomic_read(&conf->active_stripes) || |
7549 | atomic_read(&conf->r5c_cached_full_stripes) || | ||
7550 | atomic_read(&conf->r5c_cached_partial_stripes)) { | ||
7534 | return -EBUSY; | 7551 | return -EBUSY; |
7535 | } | 7552 | } |
7536 | log_exit(conf); | 7553 | log_exit(conf); |