diff options
Diffstat (limited to 'drivers/md/raid5.c')
| -rw-r--r-- | drivers/md/raid5.c | 269 | 
1 files changed, 173 insertions, 96 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 94829804ab7f..d29215d966da 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c  | |||
| @@ -156,13 +156,16 @@ static inline int raid6_next_disk(int disk, int raid_disks) | |||
| 156 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, | 156 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, | 
| 157 | int *count, int syndrome_disks) | 157 | int *count, int syndrome_disks) | 
| 158 | { | 158 | { | 
| 159 | int slot; | 159 | int slot = *count; | 
| 160 | 160 | ||
| 161 | if (sh->ddf_layout) | ||
| 162 | (*count)++; | ||
| 161 | if (idx == sh->pd_idx) | 163 | if (idx == sh->pd_idx) | 
| 162 | return syndrome_disks; | 164 | return syndrome_disks; | 
| 163 | if (idx == sh->qd_idx) | 165 | if (idx == sh->qd_idx) | 
| 164 | return syndrome_disks + 1; | 166 | return syndrome_disks + 1; | 
| 165 | slot = (*count)++; | 167 | if (!sh->ddf_layout) | 
| 168 | (*count)++; | ||
| 166 | return slot; | 169 | return slot; | 
| 167 | } | 170 | } | 
| 168 | 171 | ||
| @@ -717,7 +720,7 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) | |||
| 717 | int i; | 720 | int i; | 
| 718 | 721 | ||
| 719 | for (i = 0; i < disks; i++) | 722 | for (i = 0; i < disks; i++) | 
| 720 | srcs[i] = (void *)raid6_empty_zero_page; | 723 | srcs[i] = NULL; | 
| 721 | 724 | ||
| 722 | count = 0; | 725 | count = 0; | 
| 723 | i = d0_idx; | 726 | i = d0_idx; | 
| @@ -727,9 +730,8 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) | |||
| 727 | srcs[slot] = sh->dev[i].page; | 730 | srcs[slot] = sh->dev[i].page; | 
| 728 | i = raid6_next_disk(i, disks); | 731 | i = raid6_next_disk(i, disks); | 
| 729 | } while (i != d0_idx); | 732 | } while (i != d0_idx); | 
| 730 | BUG_ON(count != syndrome_disks); | ||
| 731 | 733 | ||
| 732 | return count; | 734 | return syndrome_disks; | 
| 733 | } | 735 | } | 
| 734 | 736 | ||
| 735 | static struct dma_async_tx_descriptor * | 737 | static struct dma_async_tx_descriptor * | 
| @@ -814,7 +816,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) | |||
| 814 | * slot number conversion for 'faila' and 'failb' | 816 | * slot number conversion for 'faila' and 'failb' | 
| 815 | */ | 817 | */ | 
| 816 | for (i = 0; i < disks ; i++) | 818 | for (i = 0; i < disks ; i++) | 
| 817 | blocks[i] = (void *)raid6_empty_zero_page; | 819 | blocks[i] = NULL; | 
| 818 | count = 0; | 820 | count = 0; | 
| 819 | i = d0_idx; | 821 | i = d0_idx; | 
| 820 | do { | 822 | do { | 
| @@ -828,7 +830,6 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) | |||
| 828 | failb = slot; | 830 | failb = slot; | 
| 829 | i = raid6_next_disk(i, disks); | 831 | i = raid6_next_disk(i, disks); | 
| 830 | } while (i != d0_idx); | 832 | } while (i != d0_idx); | 
| 831 | BUG_ON(count != syndrome_disks); | ||
| 832 | 833 | ||
| 833 | BUG_ON(faila == failb); | 834 | BUG_ON(faila == failb); | 
| 834 | if (failb < faila) | 835 | if (failb < faila) | 
| @@ -845,7 +846,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) | |||
| 845 | init_async_submit(&submit, ASYNC_TX_FENCE, NULL, | 846 | init_async_submit(&submit, ASYNC_TX_FENCE, NULL, | 
| 846 | ops_complete_compute, sh, | 847 | ops_complete_compute, sh, | 
| 847 | to_addr_conv(sh, percpu)); | 848 | to_addr_conv(sh, percpu)); | 
| 848 | return async_gen_syndrome(blocks, 0, count+2, | 849 | return async_gen_syndrome(blocks, 0, syndrome_disks+2, | 
| 849 | STRIPE_SIZE, &submit); | 850 | STRIPE_SIZE, &submit); | 
| 850 | } else { | 851 | } else { | 
| 851 | struct page *dest; | 852 | struct page *dest; | 
| @@ -1139,7 +1140,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu | |||
| 1139 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | 1140 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | 
| 1140 | } | 1141 | } | 
| 1141 | 1142 | ||
| 1142 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | 1143 | static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | 
| 1143 | { | 1144 | { | 
| 1144 | int overlap_clear = 0, i, disks = sh->disks; | 1145 | int overlap_clear = 0, i, disks = sh->disks; | 
| 1145 | struct dma_async_tx_descriptor *tx = NULL; | 1146 | struct dma_async_tx_descriptor *tx = NULL; | 
| @@ -1204,22 +1205,55 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
| 1204 | put_cpu(); | 1205 | put_cpu(); | 
| 1205 | } | 1206 | } | 
| 1206 | 1207 | ||
| 1208 | #ifdef CONFIG_MULTICORE_RAID456 | ||
| 1209 | static void async_run_ops(void *param, async_cookie_t cookie) | ||
| 1210 | { | ||
| 1211 | struct stripe_head *sh = param; | ||
| 1212 | unsigned long ops_request = sh->ops.request; | ||
| 1213 | |||
| 1214 | clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state); | ||
| 1215 | wake_up(&sh->ops.wait_for_ops); | ||
| 1216 | |||
| 1217 | __raid_run_ops(sh, ops_request); | ||
| 1218 | release_stripe(sh); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | ||
| 1222 | { | ||
| 1223 | /* since handle_stripe can be called outside of raid5d context | ||
| 1224 | * we need to ensure sh->ops.request is de-staged before another | ||
| 1225 | * request arrives | ||
| 1226 | */ | ||
| 1227 | wait_event(sh->ops.wait_for_ops, | ||
| 1228 | !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state)); | ||
| 1229 | sh->ops.request = ops_request; | ||
| 1230 | |||
| 1231 | atomic_inc(&sh->count); | ||
| 1232 | async_schedule(async_run_ops, sh); | ||
| 1233 | } | ||
| 1234 | #else | ||
| 1235 | #define raid_run_ops __raid_run_ops | ||
| 1236 | #endif | ||
| 1237 | |||
| 1207 | static int grow_one_stripe(raid5_conf_t *conf) | 1238 | static int grow_one_stripe(raid5_conf_t *conf) | 
| 1208 | { | 1239 | { | 
| 1209 | struct stripe_head *sh; | 1240 | struct stripe_head *sh; | 
| 1241 | int disks = max(conf->raid_disks, conf->previous_raid_disks); | ||
| 1210 | sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); | 1242 | sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); | 
| 1211 | if (!sh) | 1243 | if (!sh) | 
| 1212 | return 0; | 1244 | return 0; | 
| 1213 | memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); | 1245 | memset(sh, 0, sizeof(*sh) + (disks-1)*sizeof(struct r5dev)); | 
| 1214 | sh->raid_conf = conf; | 1246 | sh->raid_conf = conf; | 
| 1215 | spin_lock_init(&sh->lock); | 1247 | spin_lock_init(&sh->lock); | 
| 1248 | #ifdef CONFIG_MULTICORE_RAID456 | ||
| 1249 | init_waitqueue_head(&sh->ops.wait_for_ops); | ||
| 1250 | #endif | ||
| 1216 | 1251 | ||
| 1217 | if (grow_buffers(sh, conf->raid_disks)) { | 1252 | if (grow_buffers(sh, disks)) { | 
| 1218 | shrink_buffers(sh, conf->raid_disks); | 1253 | shrink_buffers(sh, disks); | 
| 1219 | kmem_cache_free(conf->slab_cache, sh); | 1254 | kmem_cache_free(conf->slab_cache, sh); | 
| 1220 | return 0; | 1255 | return 0; | 
| 1221 | } | 1256 | } | 
| 1222 | sh->disks = conf->raid_disks; | ||
| 1223 | /* we just created an active stripe so... */ | 1257 | /* we just created an active stripe so... */ | 
| 1224 | atomic_set(&sh->count, 1); | 1258 | atomic_set(&sh->count, 1); | 
| 1225 | atomic_inc(&conf->active_stripes); | 1259 | atomic_inc(&conf->active_stripes); | 
| @@ -1231,7 +1265,7 @@ static int grow_one_stripe(raid5_conf_t *conf) | |||
| 1231 | static int grow_stripes(raid5_conf_t *conf, int num) | 1265 | static int grow_stripes(raid5_conf_t *conf, int num) | 
| 1232 | { | 1266 | { | 
| 1233 | struct kmem_cache *sc; | 1267 | struct kmem_cache *sc; | 
| 1234 | int devs = conf->raid_disks; | 1268 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 
| 1235 | 1269 | ||
| 1236 | sprintf(conf->cache_name[0], | 1270 | sprintf(conf->cache_name[0], | 
| 1237 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 1271 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 
| @@ -1329,6 +1363,9 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
| 1329 | 1363 | ||
| 1330 | nsh->raid_conf = conf; | 1364 | nsh->raid_conf = conf; | 
| 1331 | spin_lock_init(&nsh->lock); | 1365 | spin_lock_init(&nsh->lock); | 
| 1366 | #ifdef CONFIG_MULTICORE_RAID456 | ||
| 1367 | init_waitqueue_head(&nsh->ops.wait_for_ops); | ||
| 1368 | #endif | ||
| 1332 | 1369 | ||
| 1333 | list_add(&nsh->lru, &newstripes); | 1370 | list_add(&nsh->lru, &newstripes); | 
| 1334 | } | 1371 | } | 
| @@ -1899,10 +1936,15 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
| 1899 | case ALGORITHM_PARITY_N: | 1936 | case ALGORITHM_PARITY_N: | 
| 1900 | break; | 1937 | break; | 
| 1901 | case ALGORITHM_ROTATING_N_CONTINUE: | 1938 | case ALGORITHM_ROTATING_N_CONTINUE: | 
| 1939 | /* Like left_symmetric, but P is before Q */ | ||
| 1902 | if (sh->pd_idx == 0) | 1940 | if (sh->pd_idx == 0) | 
| 1903 | i--; /* P D D D Q */ | 1941 | i--; /* P D D D Q */ | 
| 1904 | else if (i > sh->pd_idx) | 1942 | else { | 
| 1905 | i -= 2; /* D D Q P D */ | 1943 | /* D D Q P D */ | 
| 1944 | if (i < sh->pd_idx) | ||
| 1945 | i += raid_disks; | ||
| 1946 | i -= (sh->pd_idx + 1); | ||
| 1947 | } | ||
| 1906 | break; | 1948 | break; | 
| 1907 | case ALGORITHM_LEFT_ASYMMETRIC_6: | 1949 | case ALGORITHM_LEFT_ASYMMETRIC_6: | 
| 1908 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | 1950 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | 
| @@ -2896,7 +2938,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
| 2896 | * | 2938 | * | 
| 2897 | */ | 2939 | */ | 
| 2898 | 2940 | ||
| 2899 | static bool handle_stripe5(struct stripe_head *sh) | 2941 | static void handle_stripe5(struct stripe_head *sh) | 
| 2900 | { | 2942 | { | 
| 2901 | raid5_conf_t *conf = sh->raid_conf; | 2943 | raid5_conf_t *conf = sh->raid_conf; | 
| 2902 | int disks = sh->disks, i; | 2944 | int disks = sh->disks, i; | 
| @@ -3167,11 +3209,9 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
| 3167 | ops_run_io(sh, &s); | 3209 | ops_run_io(sh, &s); | 
| 3168 | 3210 | ||
| 3169 | return_io(return_bi); | 3211 | return_io(return_bi); | 
| 3170 | |||
| 3171 | return blocked_rdev == NULL; | ||
| 3172 | } | 3212 | } | 
| 3173 | 3213 | ||
| 3174 | static bool handle_stripe6(struct stripe_head *sh) | 3214 | static void handle_stripe6(struct stripe_head *sh) | 
| 3175 | { | 3215 | { | 
| 3176 | raid5_conf_t *conf = sh->raid_conf; | 3216 | raid5_conf_t *conf = sh->raid_conf; | 
| 3177 | int disks = sh->disks; | 3217 | int disks = sh->disks; | 
| @@ -3455,17 +3495,14 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
| 3455 | ops_run_io(sh, &s); | 3495 | ops_run_io(sh, &s); | 
| 3456 | 3496 | ||
| 3457 | return_io(return_bi); | 3497 | return_io(return_bi); | 
| 3458 | |||
| 3459 | return blocked_rdev == NULL; | ||
| 3460 | } | 3498 | } | 
| 3461 | 3499 | ||
| 3462 | /* returns true if the stripe was handled */ | 3500 | static void handle_stripe(struct stripe_head *sh) | 
| 3463 | static bool handle_stripe(struct stripe_head *sh) | ||
| 3464 | { | 3501 | { | 
| 3465 | if (sh->raid_conf->level == 6) | 3502 | if (sh->raid_conf->level == 6) | 
| 3466 | return handle_stripe6(sh); | 3503 | handle_stripe6(sh); | 
| 3467 | else | 3504 | else | 
| 3468 | return handle_stripe5(sh); | 3505 | handle_stripe5(sh); | 
| 3469 | } | 3506 | } | 
| 3470 | 3507 | ||
| 3471 | static void raid5_activate_delayed(raid5_conf_t *conf) | 3508 | static void raid5_activate_delayed(raid5_conf_t *conf) | 
| @@ -3503,9 +3540,10 @@ static void unplug_slaves(mddev_t *mddev) | |||
| 3503 | { | 3540 | { | 
| 3504 | raid5_conf_t *conf = mddev->private; | 3541 | raid5_conf_t *conf = mddev->private; | 
| 3505 | int i; | 3542 | int i; | 
| 3543 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | ||
| 3506 | 3544 | ||
| 3507 | rcu_read_lock(); | 3545 | rcu_read_lock(); | 
| 3508 | for (i = 0; i < conf->raid_disks; i++) { | 3546 | for (i = 0; i < devs; i++) { | 
| 3509 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); | 3547 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); | 
| 3510 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { | 3548 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { | 
| 3511 | struct request_queue *r_queue = bdev_get_queue(rdev->bdev); | 3549 | struct request_queue *r_queue = bdev_get_queue(rdev->bdev); | 
| @@ -4011,6 +4049,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 4011 | sector_nr = conf->reshape_progress; | 4049 | sector_nr = conf->reshape_progress; | 
| 4012 | sector_div(sector_nr, new_data_disks); | 4050 | sector_div(sector_nr, new_data_disks); | 
| 4013 | if (sector_nr) { | 4051 | if (sector_nr) { | 
| 4052 | mddev->curr_resync_completed = sector_nr; | ||
| 4053 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
| 4014 | *skipped = 1; | 4054 | *skipped = 1; | 
| 4015 | return sector_nr; | 4055 | return sector_nr; | 
| 4016 | } | 4056 | } | 
| @@ -4277,9 +4317,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
| 4277 | clear_bit(STRIPE_INSYNC, &sh->state); | 4317 | clear_bit(STRIPE_INSYNC, &sh->state); | 
| 4278 | spin_unlock(&sh->lock); | 4318 | spin_unlock(&sh->lock); | 
| 4279 | 4319 | ||
| 4280 | /* wait for any blocked device to be handled */ | 4320 | handle_stripe(sh); | 
| 4281 | while (unlikely(!handle_stripe(sh))) | ||
| 4282 | ; | ||
| 4283 | release_stripe(sh); | 4321 | release_stripe(sh); | 
| 4284 | 4322 | ||
| 4285 | return STRIPE_SECTORS; | 4323 | return STRIPE_SECTORS; | 
| @@ -4349,37 +4387,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
| 4349 | return handled; | 4387 | return handled; | 
| 4350 | } | 4388 | } | 
| 4351 | 4389 | ||
| 4352 | #ifdef CONFIG_MULTICORE_RAID456 | ||
| 4353 | static void __process_stripe(void *param, async_cookie_t cookie) | ||
| 4354 | { | ||
| 4355 | struct stripe_head *sh = param; | ||
| 4356 | |||
| 4357 | handle_stripe(sh); | ||
| 4358 | release_stripe(sh); | ||
| 4359 | } | ||
| 4360 | |||
| 4361 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
| 4362 | { | ||
| 4363 | async_schedule_domain(__process_stripe, sh, domain); | ||
| 4364 | } | ||
| 4365 | |||
| 4366 | static void synchronize_stripe_processing(struct list_head *domain) | ||
| 4367 | { | ||
| 4368 | async_synchronize_full_domain(domain); | ||
| 4369 | } | ||
| 4370 | #else | ||
| 4371 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
| 4372 | { | ||
| 4373 | handle_stripe(sh); | ||
| 4374 | release_stripe(sh); | ||
| 4375 | cond_resched(); | ||
| 4376 | } | ||
| 4377 | |||
| 4378 | static void synchronize_stripe_processing(struct list_head *domain) | ||
| 4379 | { | ||
| 4380 | } | ||
| 4381 | #endif | ||
| 4382 | |||
| 4383 | 4390 | ||
| 4384 | /* | 4391 | /* | 
| 4385 | * This is our raid5 kernel thread. | 4392 | * This is our raid5 kernel thread. | 
| @@ -4393,7 +4400,6 @@ static void raid5d(mddev_t *mddev) | |||
| 4393 | struct stripe_head *sh; | 4400 | struct stripe_head *sh; | 
| 4394 | raid5_conf_t *conf = mddev->private; | 4401 | raid5_conf_t *conf = mddev->private; | 
| 4395 | int handled; | 4402 | int handled; | 
| 4396 | LIST_HEAD(raid_domain); | ||
| 4397 | 4403 | ||
| 4398 | pr_debug("+++ raid5d active\n"); | 4404 | pr_debug("+++ raid5d active\n"); | 
| 4399 | 4405 | ||
| @@ -4430,7 +4436,9 @@ static void raid5d(mddev_t *mddev) | |||
| 4430 | spin_unlock_irq(&conf->device_lock); | 4436 | spin_unlock_irq(&conf->device_lock); | 
| 4431 | 4437 | ||
| 4432 | handled++; | 4438 | handled++; | 
| 4433 | process_stripe(sh, &raid_domain); | 4439 | handle_stripe(sh); | 
| 4440 | release_stripe(sh); | ||
| 4441 | cond_resched(); | ||
| 4434 | 4442 | ||
| 4435 | spin_lock_irq(&conf->device_lock); | 4443 | spin_lock_irq(&conf->device_lock); | 
| 4436 | } | 4444 | } | 
| @@ -4438,7 +4446,6 @@ static void raid5d(mddev_t *mddev) | |||
| 4438 | 4446 | ||
| 4439 | spin_unlock_irq(&conf->device_lock); | 4447 | spin_unlock_irq(&conf->device_lock); | 
| 4440 | 4448 | ||
| 4441 | synchronize_stripe_processing(&raid_domain); | ||
| 4442 | async_tx_issue_pending_all(); | 4449 | async_tx_issue_pending_all(); | 
| 4443 | unplug_slaves(mddev); | 4450 | unplug_slaves(mddev); | 
| 4444 | 4451 | ||
| @@ -4558,13 +4565,9 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 4558 | 4565 | ||
| 4559 | if (!sectors) | 4566 | if (!sectors) | 
| 4560 | sectors = mddev->dev_sectors; | 4567 | sectors = mddev->dev_sectors; | 
| 4561 | if (!raid_disks) { | 4568 | if (!raid_disks) | 
| 4562 | /* size is defined by the smallest of previous and new size */ | 4569 | /* size is defined by the smallest of previous and new size */ | 
| 4563 | if (conf->raid_disks < conf->previous_raid_disks) | 4570 | raid_disks = min(conf->raid_disks, conf->previous_raid_disks); | 
| 4564 | raid_disks = conf->raid_disks; | ||
| 4565 | else | ||
| 4566 | raid_disks = conf->previous_raid_disks; | ||
| 4567 | } | ||
| 4568 | 4571 | ||
| 4569 | sectors &= ~((sector_t)mddev->chunk_sectors - 1); | 4572 | sectors &= ~((sector_t)mddev->chunk_sectors - 1); | 
| 4570 | sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); | 4573 | sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); | 
| @@ -4665,7 +4668,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) | |||
| 4665 | } | 4668 | } | 
| 4666 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | 4669 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | 
| 4667 | } | 4670 | } | 
| 4668 | scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); | 4671 | scribble = kmalloc(conf->scribble_len, GFP_KERNEL); | 
| 4669 | if (!scribble) { | 4672 | if (!scribble) { | 
| 4670 | err = -ENOMEM; | 4673 | err = -ENOMEM; | 
| 4671 | break; | 4674 | break; | 
| @@ -4686,7 +4689,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) | |||
| 4686 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4689 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 
| 4687 | { | 4690 | { | 
| 4688 | raid5_conf_t *conf; | 4691 | raid5_conf_t *conf; | 
| 4689 | int raid_disk, memory; | 4692 | int raid_disk, memory, max_disks; | 
| 4690 | mdk_rdev_t *rdev; | 4693 | mdk_rdev_t *rdev; | 
| 4691 | struct disk_info *disk; | 4694 | struct disk_info *disk; | 
| 4692 | 4695 | ||
| @@ -4722,15 +4725,28 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4722 | conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL); | 4725 | conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL); | 
| 4723 | if (conf == NULL) | 4726 | if (conf == NULL) | 
| 4724 | goto abort; | 4727 | goto abort; | 
| 4728 | spin_lock_init(&conf->device_lock); | ||
| 4729 | init_waitqueue_head(&conf->wait_for_stripe); | ||
| 4730 | init_waitqueue_head(&conf->wait_for_overlap); | ||
| 4731 | INIT_LIST_HEAD(&conf->handle_list); | ||
| 4732 | INIT_LIST_HEAD(&conf->hold_list); | ||
| 4733 | INIT_LIST_HEAD(&conf->delayed_list); | ||
| 4734 | INIT_LIST_HEAD(&conf->bitmap_list); | ||
| 4735 | INIT_LIST_HEAD(&conf->inactive_list); | ||
| 4736 | atomic_set(&conf->active_stripes, 0); | ||
| 4737 | atomic_set(&conf->preread_active_stripes, 0); | ||
| 4738 | atomic_set(&conf->active_aligned_reads, 0); | ||
| 4739 | conf->bypass_threshold = BYPASS_THRESHOLD; | ||
| 4725 | 4740 | ||
| 4726 | conf->raid_disks = mddev->raid_disks; | 4741 | conf->raid_disks = mddev->raid_disks; | 
| 4727 | conf->scribble_len = scribble_len(conf->raid_disks); | ||
| 4728 | if (mddev->reshape_position == MaxSector) | 4742 | if (mddev->reshape_position == MaxSector) | 
| 4729 | conf->previous_raid_disks = mddev->raid_disks; | 4743 | conf->previous_raid_disks = mddev->raid_disks; | 
| 4730 | else | 4744 | else | 
| 4731 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | 4745 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | 
| 4746 | max_disks = max(conf->raid_disks, conf->previous_raid_disks); | ||
| 4747 | conf->scribble_len = scribble_len(max_disks); | ||
| 4732 | 4748 | ||
| 4733 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), | 4749 | conf->disks = kzalloc(max_disks * sizeof(struct disk_info), | 
| 4734 | GFP_KERNEL); | 4750 | GFP_KERNEL); | 
| 4735 | if (!conf->disks) | 4751 | if (!conf->disks) | 
| 4736 | goto abort; | 4752 | goto abort; | 
| @@ -4744,24 +4760,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4744 | if (raid5_alloc_percpu(conf) != 0) | 4760 | if (raid5_alloc_percpu(conf) != 0) | 
| 4745 | goto abort; | 4761 | goto abort; | 
| 4746 | 4762 | ||
| 4747 | spin_lock_init(&conf->device_lock); | ||
| 4748 | init_waitqueue_head(&conf->wait_for_stripe); | ||
| 4749 | init_waitqueue_head(&conf->wait_for_overlap); | ||
| 4750 | INIT_LIST_HEAD(&conf->handle_list); | ||
| 4751 | INIT_LIST_HEAD(&conf->hold_list); | ||
| 4752 | INIT_LIST_HEAD(&conf->delayed_list); | ||
| 4753 | INIT_LIST_HEAD(&conf->bitmap_list); | ||
| 4754 | INIT_LIST_HEAD(&conf->inactive_list); | ||
| 4755 | atomic_set(&conf->active_stripes, 0); | ||
| 4756 | atomic_set(&conf->preread_active_stripes, 0); | ||
| 4757 | atomic_set(&conf->active_aligned_reads, 0); | ||
| 4758 | conf->bypass_threshold = BYPASS_THRESHOLD; | ||
| 4759 | |||
| 4760 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); | 4763 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); | 
| 4761 | 4764 | ||
| 4762 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4765 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 
| 4763 | raid_disk = rdev->raid_disk; | 4766 | raid_disk = rdev->raid_disk; | 
| 4764 | if (raid_disk >= conf->raid_disks | 4767 | if (raid_disk >= max_disks | 
| 4765 | || raid_disk < 0) | 4768 | || raid_disk < 0) | 
| 4766 | continue; | 4769 | continue; | 
| 4767 | disk = conf->disks + raid_disk; | 4770 | disk = conf->disks + raid_disk; | 
| @@ -4793,7 +4796,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4793 | } | 4796 | } | 
| 4794 | 4797 | ||
| 4795 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | 4798 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | 
| 4796 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | 4799 | max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | 
| 4797 | if (grow_stripes(conf, conf->max_nr_stripes)) { | 4800 | if (grow_stripes(conf, conf->max_nr_stripes)) { | 
| 4798 | printk(KERN_ERR | 4801 | printk(KERN_ERR | 
| 4799 | "raid5: couldn't allocate %dkB for buffers\n", memory); | 4802 | "raid5: couldn't allocate %dkB for buffers\n", memory); | 
| @@ -4820,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4820 | return ERR_PTR(-ENOMEM); | 4823 | return ERR_PTR(-ENOMEM); | 
| 4821 | } | 4824 | } | 
| 4822 | 4825 | ||
| 4826 | |||
| 4827 | static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded) | ||
| 4828 | { | ||
| 4829 | switch (algo) { | ||
| 4830 | case ALGORITHM_PARITY_0: | ||
| 4831 | if (raid_disk < max_degraded) | ||
| 4832 | return 1; | ||
| 4833 | break; | ||
| 4834 | case ALGORITHM_PARITY_N: | ||
| 4835 | if (raid_disk >= raid_disks - max_degraded) | ||
| 4836 | return 1; | ||
| 4837 | break; | ||
| 4838 | case ALGORITHM_PARITY_0_6: | ||
| 4839 | if (raid_disk == 0 || | ||
| 4840 | raid_disk == raid_disks - 1) | ||
| 4841 | return 1; | ||
| 4842 | break; | ||
| 4843 | case ALGORITHM_LEFT_ASYMMETRIC_6: | ||
| 4844 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | ||
| 4845 | case ALGORITHM_LEFT_SYMMETRIC_6: | ||
| 4846 | case ALGORITHM_RIGHT_SYMMETRIC_6: | ||
| 4847 | if (raid_disk == raid_disks - 1) | ||
| 4848 | return 1; | ||
| 4849 | } | ||
| 4850 | return 0; | ||
| 4851 | } | ||
| 4852 | |||
| 4823 | static int run(mddev_t *mddev) | 4853 | static int run(mddev_t *mddev) | 
| 4824 | { | 4854 | { | 
| 4825 | raid5_conf_t *conf; | 4855 | raid5_conf_t *conf; | 
| 4826 | int working_disks = 0, chunk_size; | 4856 | int working_disks = 0, chunk_size; | 
| 4857 | int dirty_parity_disks = 0; | ||
| 4827 | mdk_rdev_t *rdev; | 4858 | mdk_rdev_t *rdev; | 
| 4859 | sector_t reshape_offset = 0; | ||
| 4828 | 4860 | ||
| 4829 | if (mddev->recovery_cp != MaxSector) | 4861 | if (mddev->recovery_cp != MaxSector) | 
| 4830 | printk(KERN_NOTICE "raid5: %s is not clean" | 4862 | printk(KERN_NOTICE "raid5: %s is not clean" | 
| @@ -4858,6 +4890,7 @@ static int run(mddev_t *mddev) | |||
| 4858 | "on a stripe boundary\n"); | 4890 | "on a stripe boundary\n"); | 
| 4859 | return -EINVAL; | 4891 | return -EINVAL; | 
| 4860 | } | 4892 | } | 
| 4893 | reshape_offset = here_new * mddev->new_chunk_sectors; | ||
| 4861 | /* here_new is the stripe we will write to */ | 4894 | /* here_new is the stripe we will write to */ | 
| 4862 | here_old = mddev->reshape_position; | 4895 | here_old = mddev->reshape_position; | 
| 4863 | sector_div(here_old, mddev->chunk_sectors * | 4896 | sector_div(here_old, mddev->chunk_sectors * | 
| @@ -4913,12 +4946,54 @@ static int run(mddev_t *mddev) | |||
| 4913 | /* | 4946 | /* | 
| 4914 | * 0 for a fully functional array, 1 or 2 for a degraded array. | 4947 | * 0 for a fully functional array, 1 or 2 for a degraded array. | 
| 4915 | */ | 4948 | */ | 
| 4916 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4949 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 
| 4917 | if (rdev->raid_disk >= 0 && | 4950 | if (rdev->raid_disk < 0) | 
| 4918 | test_bit(In_sync, &rdev->flags)) | 4951 | continue; | 
| 4952 | if (test_bit(In_sync, &rdev->flags)) | ||
| 4919 | working_disks++; | 4953 | working_disks++; | 
| 4954 | /* This disc is not fully in-sync. However if it | ||
| 4955 | * just stored parity (beyond the recovery_offset), | ||
| 4956 | * when we don't need to be concerned about the | ||
| 4957 | * array being dirty. | ||
| 4958 | * When reshape goes 'backwards', we never have | ||
| 4959 | * partially completed devices, so we only need | ||
| 4960 | * to worry about reshape going forwards. | ||
| 4961 | */ | ||
| 4962 | /* Hack because v0.91 doesn't store recovery_offset properly. */ | ||
| 4963 | if (mddev->major_version == 0 && | ||
| 4964 | mddev->minor_version > 90) | ||
| 4965 | rdev->recovery_offset = reshape_offset; | ||
| 4966 | |||
| 4967 | printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n", | ||
| 4968 | rdev->raid_disk, working_disks, conf->prev_algo, | ||
| 4969 | conf->previous_raid_disks, conf->max_degraded, | ||
| 4970 | conf->algorithm, conf->raid_disks, | ||
| 4971 | only_parity(rdev->raid_disk, | ||
| 4972 | conf->prev_algo, | ||
| 4973 | conf->previous_raid_disks, | ||
| 4974 | conf->max_degraded), | ||
| 4975 | only_parity(rdev->raid_disk, | ||
| 4976 | conf->algorithm, | ||
| 4977 | conf->raid_disks, | ||
| 4978 | conf->max_degraded)); | ||
| 4979 | if (rdev->recovery_offset < reshape_offset) { | ||
| 4980 | /* We need to check old and new layout */ | ||
| 4981 | if (!only_parity(rdev->raid_disk, | ||
| 4982 | conf->algorithm, | ||
| 4983 | conf->raid_disks, | ||
| 4984 | conf->max_degraded)) | ||
| 4985 | continue; | ||
| 4986 | } | ||
| 4987 | if (!only_parity(rdev->raid_disk, | ||
| 4988 | conf->prev_algo, | ||
| 4989 | conf->previous_raid_disks, | ||
| 4990 | conf->max_degraded)) | ||
| 4991 | continue; | ||
| 4992 | dirty_parity_disks++; | ||
| 4993 | } | ||
| 4920 | 4994 | ||
| 4921 | mddev->degraded = conf->raid_disks - working_disks; | 4995 | mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) | 
| 4996 | - working_disks); | ||
| 4922 | 4997 | ||
| 4923 | if (mddev->degraded > conf->max_degraded) { | 4998 | if (mddev->degraded > conf->max_degraded) { | 
| 4924 | printk(KERN_ERR "raid5: not enough operational devices for %s" | 4999 | printk(KERN_ERR "raid5: not enough operational devices for %s" | 
| @@ -4931,7 +5006,7 @@ static int run(mddev_t *mddev) | |||
| 4931 | mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); | 5006 | mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); | 
| 4932 | mddev->resync_max_sectors = mddev->dev_sectors; | 5007 | mddev->resync_max_sectors = mddev->dev_sectors; | 
| 4933 | 5008 | ||
| 4934 | if (mddev->degraded > 0 && | 5009 | if (mddev->degraded > dirty_parity_disks && | 
| 4935 | mddev->recovery_cp != MaxSector) { | 5010 | mddev->recovery_cp != MaxSector) { | 
| 4936 | if (mddev->ok_start_degraded) | 5011 | if (mddev->ok_start_degraded) | 
| 4937 | printk(KERN_WARNING | 5012 | printk(KERN_WARNING | 
| @@ -5357,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 5357 | !test_bit(Faulty, &rdev->flags)) { | 5432 | !test_bit(Faulty, &rdev->flags)) { | 
| 5358 | if (raid5_add_disk(mddev, rdev) == 0) { | 5433 | if (raid5_add_disk(mddev, rdev) == 0) { | 
| 5359 | char nm[20]; | 5434 | char nm[20]; | 
| 5360 | set_bit(In_sync, &rdev->flags); | 5435 | if (rdev->raid_disk >= conf->previous_raid_disks) | 
| 5436 | set_bit(In_sync, &rdev->flags); | ||
| 5437 | else | ||
| 5438 | rdev->recovery_offset = 0; | ||
| 5361 | added_devices++; | 5439 | added_devices++; | 
| 5362 | rdev->recovery_offset = 0; | ||
| 5363 | sprintf(nm, "rd%d", rdev->raid_disk); | 5440 | sprintf(nm, "rd%d", rdev->raid_disk); | 
| 5364 | if (sysfs_create_link(&mddev->kobj, | 5441 | if (sysfs_create_link(&mddev->kobj, | 
| 5365 | &rdev->kobj, nm)) | 5442 | &rdev->kobj, nm)) | 
