diff options
-rw-r--r-- | drivers/md/raid5.c | 132 | ||||
-rw-r--r-- | drivers/md/raid5.h | 8 |
2 files changed, 110 insertions, 30 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5359236a1ec7..7727954cf726 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -642,11 +642,18 @@ static void ops_complete_compute5(void *stripe_head_ref) | |||
642 | release_stripe(sh); | 642 | release_stripe(sh); |
643 | } | 643 | } |
644 | 644 | ||
645 | static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | 645 | /* return a pointer to the address conversion region of the scribble buffer */ |
646 | static addr_conv_t *to_addr_conv(struct stripe_head *sh, | ||
647 | struct raid5_percpu *percpu) | ||
648 | { | ||
649 | return percpu->scribble + sizeof(struct page *) * (sh->disks + 2); | ||
650 | } | ||
651 | |||
652 | static struct dma_async_tx_descriptor * | ||
653 | ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
646 | { | 654 | { |
647 | /* kernel stack size limits the total number of disks */ | ||
648 | int disks = sh->disks; | 655 | int disks = sh->disks; |
649 | struct page *xor_srcs[disks]; | 656 | struct page **xor_srcs = percpu->scribble; |
650 | int target = sh->ops.target; | 657 | int target = sh->ops.target; |
651 | struct r5dev *tgt = &sh->dev[target]; | 658 | struct r5dev *tgt = &sh->dev[target]; |
652 | struct page *xor_dest = tgt->page; | 659 | struct page *xor_dest = tgt->page; |
@@ -666,7 +673,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | |||
666 | atomic_inc(&sh->count); | 673 | atomic_inc(&sh->count); |
667 | 674 | ||
668 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | 675 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, |
669 | ops_complete_compute5, sh, NULL); | 676 | ops_complete_compute5, sh, to_addr_conv(sh, percpu)); |
670 | if (unlikely(count == 1)) | 677 | if (unlikely(count == 1)) |
671 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); | 678 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
672 | else | 679 | else |
@@ -684,11 +691,11 @@ static void ops_complete_prexor(void *stripe_head_ref) | |||
684 | } | 691 | } |
685 | 692 | ||
686 | static struct dma_async_tx_descriptor * | 693 | static struct dma_async_tx_descriptor * |
687 | ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 694 | ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, |
695 | struct dma_async_tx_descriptor *tx) | ||
688 | { | 696 | { |
689 | /* kernel stack size limits the total number of disks */ | ||
690 | int disks = sh->disks; | 697 | int disks = sh->disks; |
691 | struct page *xor_srcs[disks]; | 698 | struct page **xor_srcs = percpu->scribble; |
692 | int count = 0, pd_idx = sh->pd_idx, i; | 699 | int count = 0, pd_idx = sh->pd_idx, i; |
693 | struct async_submit_ctl submit; | 700 | struct async_submit_ctl submit; |
694 | 701 | ||
@@ -706,7 +713,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
706 | } | 713 | } |
707 | 714 | ||
708 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, | 715 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, |
709 | ops_complete_prexor, sh, NULL); | 716 | ops_complete_prexor, sh, to_addr_conv(sh, percpu)); |
710 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); | 717 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
711 | 718 | ||
712 | return tx; | 719 | return tx; |
@@ -775,11 +782,11 @@ static void ops_complete_postxor(void *stripe_head_ref) | |||
775 | } | 782 | } |
776 | 783 | ||
777 | static void | 784 | static void |
778 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 785 | ops_run_postxor(struct stripe_head *sh, struct raid5_percpu *percpu, |
786 | struct dma_async_tx_descriptor *tx) | ||
779 | { | 787 | { |
780 | /* kernel stack size limits the total number of disks */ | ||
781 | int disks = sh->disks; | 788 | int disks = sh->disks; |
782 | struct page *xor_srcs[disks]; | 789 | struct page **xor_srcs = percpu->scribble; |
783 | struct async_submit_ctl submit; | 790 | struct async_submit_ctl submit; |
784 | int count = 0, pd_idx = sh->pd_idx, i; | 791 | int count = 0, pd_idx = sh->pd_idx, i; |
785 | struct page *xor_dest; | 792 | struct page *xor_dest; |
@@ -819,7 +826,8 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
819 | 826 | ||
820 | atomic_inc(&sh->count); | 827 | atomic_inc(&sh->count); |
821 | 828 | ||
822 | init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL); | 829 | init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, |
830 | to_addr_conv(sh, percpu)); | ||
823 | if (unlikely(count == 1)) | 831 | if (unlikely(count == 1)) |
824 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); | 832 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
825 | else | 833 | else |
@@ -838,11 +846,10 @@ static void ops_complete_check(void *stripe_head_ref) | |||
838 | release_stripe(sh); | 846 | release_stripe(sh); |
839 | } | 847 | } |
840 | 848 | ||
841 | static void ops_run_check(struct stripe_head *sh) | 849 | static void ops_run_check(struct stripe_head *sh, struct raid5_percpu *percpu) |
842 | { | 850 | { |
843 | /* kernel stack size limits the total number of disks */ | ||
844 | int disks = sh->disks; | 851 | int disks = sh->disks; |
845 | struct page *xor_srcs[disks]; | 852 | struct page **xor_srcs = percpu->scribble; |
846 | struct dma_async_tx_descriptor *tx; | 853 | struct dma_async_tx_descriptor *tx; |
847 | struct async_submit_ctl submit; | 854 | struct async_submit_ctl submit; |
848 | 855 | ||
@@ -858,7 +865,8 @@ static void ops_run_check(struct stripe_head *sh) | |||
858 | xor_srcs[count++] = dev->page; | 865 | xor_srcs[count++] = dev->page; |
859 | } | 866 | } |
860 | 867 | ||
861 | init_async_submit(&submit, 0, NULL, NULL, NULL, NULL); | 868 | init_async_submit(&submit, 0, NULL, NULL, NULL, |
869 | to_addr_conv(sh, percpu)); | ||
862 | tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 870 | tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, |
863 | &sh->ops.zero_sum_result, &submit); | 871 | &sh->ops.zero_sum_result, &submit); |
864 | 872 | ||
@@ -871,21 +879,26 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
871 | { | 879 | { |
872 | int overlap_clear = 0, i, disks = sh->disks; | 880 | int overlap_clear = 0, i, disks = sh->disks; |
873 | struct dma_async_tx_descriptor *tx = NULL; | 881 | struct dma_async_tx_descriptor *tx = NULL; |
882 | raid5_conf_t *conf = sh->raid_conf; | ||
883 | struct raid5_percpu *percpu; | ||
884 | unsigned long cpu; | ||
874 | 885 | ||
886 | cpu = get_cpu(); | ||
887 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
875 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | 888 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { |
876 | ops_run_biofill(sh); | 889 | ops_run_biofill(sh); |
877 | overlap_clear++; | 890 | overlap_clear++; |
878 | } | 891 | } |
879 | 892 | ||
880 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { | 893 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { |
881 | tx = ops_run_compute5(sh); | 894 | tx = ops_run_compute5(sh, percpu); |
882 | /* terminate the chain if postxor is not set to be run */ | 895 | /* terminate the chain if postxor is not set to be run */ |
883 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 896 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) |
884 | async_tx_ack(tx); | 897 | async_tx_ack(tx); |
885 | } | 898 | } |
886 | 899 | ||
887 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) | 900 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) |
888 | tx = ops_run_prexor(sh, tx); | 901 | tx = ops_run_prexor(sh, percpu, tx); |
889 | 902 | ||
890 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { | 903 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { |
891 | tx = ops_run_biodrain(sh, tx); | 904 | tx = ops_run_biodrain(sh, tx); |
@@ -893,10 +906,10 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
893 | } | 906 | } |
894 | 907 | ||
895 | if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 908 | if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) |
896 | ops_run_postxor(sh, tx); | 909 | ops_run_postxor(sh, percpu, tx); |
897 | 910 | ||
898 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) | 911 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) |
899 | ops_run_check(sh); | 912 | ops_run_check(sh, percpu); |
900 | 913 | ||
901 | if (overlap_clear) | 914 | if (overlap_clear) |
902 | for (i = disks; i--; ) { | 915 | for (i = disks; i--; ) { |
@@ -904,6 +917,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
904 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 917 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) |
905 | wake_up(&sh->raid_conf->wait_for_overlap); | 918 | wake_up(&sh->raid_conf->wait_for_overlap); |
906 | } | 919 | } |
920 | put_cpu(); | ||
907 | } | 921 | } |
908 | 922 | ||
909 | static int grow_one_stripe(raid5_conf_t *conf) | 923 | static int grow_one_stripe(raid5_conf_t *conf) |
@@ -953,6 +967,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
953 | return 0; | 967 | return 0; |
954 | } | 968 | } |
955 | 969 | ||
970 | /** | ||
971 | * scribble_len - return the required size of the scribble region | ||
972 | * @num - total number of disks in the array | ||
973 | * | ||
974 | * The size must be enough to contain: | ||
975 | * 1/ a struct page pointer for each device in the array +2 | ||
976 | * 2/ room to convert each entry in (1) to its corresponding dma | ||
977 | * (dma_map_page()) or page (page_address()) address. | ||
978 | * | ||
979 | * Note: the +2 is for the destination buffers of the ddf/raid6 case where we | ||
980 | * calculate over all devices (not just the data blocks), using zeros in place | ||
981 | * of the P and Q blocks. | ||
982 | */ | ||
983 | static size_t scribble_len(int num) | ||
984 | { | ||
985 | size_t len; | ||
986 | |||
987 | len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); | ||
988 | |||
989 | return len; | ||
990 | } | ||
991 | |||
956 | static int resize_stripes(raid5_conf_t *conf, int newsize) | 992 | static int resize_stripes(raid5_conf_t *conf, int newsize) |
957 | { | 993 | { |
958 | /* Make all the stripes able to hold 'newsize' devices. | 994 | /* Make all the stripes able to hold 'newsize' devices. |
@@ -981,6 +1017,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
981 | struct stripe_head *osh, *nsh; | 1017 | struct stripe_head *osh, *nsh; |
982 | LIST_HEAD(newstripes); | 1018 | LIST_HEAD(newstripes); |
983 | struct disk_info *ndisks; | 1019 | struct disk_info *ndisks; |
1020 | unsigned long cpu; | ||
984 | int err; | 1021 | int err; |
985 | struct kmem_cache *sc; | 1022 | struct kmem_cache *sc; |
986 | int i; | 1023 | int i; |
@@ -1046,7 +1083,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1046 | /* Step 3. | 1083 | /* Step 3. |
1047 | * At this point, we are holding all the stripes so the array | 1084 | * At this point, we are holding all the stripes so the array |
1048 | * is completely stalled, so now is a good time to resize | 1085 | * is completely stalled, so now is a good time to resize |
1049 | * conf->disks. | 1086 | * conf->disks and the scribble region |
1050 | */ | 1087 | */ |
1051 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); | 1088 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); |
1052 | if (ndisks) { | 1089 | if (ndisks) { |
@@ -1057,10 +1094,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1057 | } else | 1094 | } else |
1058 | err = -ENOMEM; | 1095 | err = -ENOMEM; |
1059 | 1096 | ||
1097 | get_online_cpus(); | ||
1098 | conf->scribble_len = scribble_len(newsize); | ||
1099 | for_each_present_cpu(cpu) { | ||
1100 | struct raid5_percpu *percpu; | ||
1101 | void *scribble; | ||
1102 | |||
1103 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
1104 | scribble = kmalloc(conf->scribble_len, GFP_NOIO); | ||
1105 | |||
1106 | if (scribble) { | ||
1107 | kfree(percpu->scribble); | ||
1108 | percpu->scribble = scribble; | ||
1109 | } else { | ||
1110 | err = -ENOMEM; | ||
1111 | break; | ||
1112 | } | ||
1113 | } | ||
1114 | put_online_cpus(); | ||
1115 | |||
1060 | /* Step 4, return new stripes to service */ | 1116 | /* Step 4, return new stripes to service */ |
1061 | while(!list_empty(&newstripes)) { | 1117 | while(!list_empty(&newstripes)) { |
1062 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | 1118 | nsh = list_entry(newstripes.next, struct stripe_head, lru); |
1063 | list_del_init(&nsh->lru); | 1119 | list_del_init(&nsh->lru); |
1120 | |||
1064 | for (i=conf->raid_disks; i < newsize; i++) | 1121 | for (i=conf->raid_disks; i < newsize; i++) |
1065 | if (nsh->dev[i].page == NULL) { | 1122 | if (nsh->dev[i].page == NULL) { |
1066 | struct page *p = alloc_page(GFP_NOIO); | 1123 | struct page *p = alloc_page(GFP_NOIO); |
@@ -4318,6 +4375,7 @@ static void raid5_free_percpu(raid5_conf_t *conf) | |||
4318 | for_each_possible_cpu(cpu) { | 4375 | for_each_possible_cpu(cpu) { |
4319 | percpu = per_cpu_ptr(conf->percpu, cpu); | 4376 | percpu = per_cpu_ptr(conf->percpu, cpu); |
4320 | safe_put_page(percpu->spare_page); | 4377 | safe_put_page(percpu->spare_page); |
4378 | kfree(percpu->scribble); | ||
4321 | } | 4379 | } |
4322 | #ifdef CONFIG_HOTPLUG_CPU | 4380 | #ifdef CONFIG_HOTPLUG_CPU |
4323 | unregister_cpu_notifier(&conf->cpu_notify); | 4381 | unregister_cpu_notifier(&conf->cpu_notify); |
@@ -4347,9 +4405,15 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, | |||
4347 | switch (action) { | 4405 | switch (action) { |
4348 | case CPU_UP_PREPARE: | 4406 | case CPU_UP_PREPARE: |
4349 | case CPU_UP_PREPARE_FROZEN: | 4407 | case CPU_UP_PREPARE_FROZEN: |
4350 | if (!percpu->spare_page) | 4408 | if (conf->level == 6 && !percpu->spare_page) |
4351 | percpu->spare_page = alloc_page(GFP_KERNEL); | 4409 | percpu->spare_page = alloc_page(GFP_KERNEL); |
4352 | if (!percpu->spare_page) { | 4410 | if (!percpu->scribble) |
4411 | percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL); | ||
4412 | |||
4413 | if (!percpu->scribble || | ||
4414 | (conf->level == 6 && !percpu->spare_page)) { | ||
4415 | safe_put_page(percpu->spare_page); | ||
4416 | kfree(percpu->scribble); | ||
4353 | pr_err("%s: failed memory allocation for cpu%ld\n", | 4417 | pr_err("%s: failed memory allocation for cpu%ld\n", |
4354 | __func__, cpu); | 4418 | __func__, cpu); |
4355 | return NOTIFY_BAD; | 4419 | return NOTIFY_BAD; |
@@ -4358,7 +4422,9 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, | |||
4358 | case CPU_DEAD: | 4422 | case CPU_DEAD: |
4359 | case CPU_DEAD_FROZEN: | 4423 | case CPU_DEAD_FROZEN: |
4360 | safe_put_page(percpu->spare_page); | 4424 | safe_put_page(percpu->spare_page); |
4425 | kfree(percpu->scribble); | ||
4361 | percpu->spare_page = NULL; | 4426 | percpu->spare_page = NULL; |
4427 | percpu->scribble = NULL; | ||
4362 | break; | 4428 | break; |
4363 | default: | 4429 | default: |
4364 | break; | 4430 | break; |
@@ -4372,12 +4438,9 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) | |||
4372 | unsigned long cpu; | 4438 | unsigned long cpu; |
4373 | struct page *spare_page; | 4439 | struct page *spare_page; |
4374 | struct raid5_percpu *allcpus; | 4440 | struct raid5_percpu *allcpus; |
4441 | void *scribble; | ||
4375 | int err; | 4442 | int err; |
4376 | 4443 | ||
4377 | /* the only percpu data is the raid6 spare page */ | ||
4378 | if (conf->level != 6) | ||
4379 | return 0; | ||
4380 | |||
4381 | allcpus = alloc_percpu(struct raid5_percpu); | 4444 | allcpus = alloc_percpu(struct raid5_percpu); |
4382 | if (!allcpus) | 4445 | if (!allcpus) |
4383 | return -ENOMEM; | 4446 | return -ENOMEM; |
@@ -4386,12 +4449,20 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) | |||
4386 | get_online_cpus(); | 4449 | get_online_cpus(); |
4387 | err = 0; | 4450 | err = 0; |
4388 | for_each_present_cpu(cpu) { | 4451 | for_each_present_cpu(cpu) { |
4389 | spare_page = alloc_page(GFP_KERNEL); | 4452 | if (conf->level == 6) { |
4390 | if (!spare_page) { | 4453 | spare_page = alloc_page(GFP_KERNEL); |
4454 | if (!spare_page) { | ||
4455 | err = -ENOMEM; | ||
4456 | break; | ||
4457 | } | ||
4458 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | ||
4459 | } | ||
4460 | scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); | ||
4461 | if (!scribble) { | ||
4391 | err = -ENOMEM; | 4462 | err = -ENOMEM; |
4392 | break; | 4463 | break; |
4393 | } | 4464 | } |
4394 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | 4465 | per_cpu_ptr(conf->percpu, cpu)->scribble = scribble; |
4395 | } | 4466 | } |
4396 | #ifdef CONFIG_HOTPLUG_CPU | 4467 | #ifdef CONFIG_HOTPLUG_CPU |
4397 | conf->cpu_notify.notifier_call = raid456_cpu_notify; | 4468 | conf->cpu_notify.notifier_call = raid456_cpu_notify; |
@@ -4443,6 +4514,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4443 | goto abort; | 4514 | goto abort; |
4444 | 4515 | ||
4445 | conf->raid_disks = mddev->raid_disks; | 4516 | conf->raid_disks = mddev->raid_disks; |
4517 | conf->scribble_len = scribble_len(conf->raid_disks); | ||
4446 | if (mddev->reshape_position == MaxSector) | 4518 | if (mddev->reshape_position == MaxSector) |
4447 | conf->previous_raid_disks = mddev->raid_disks; | 4519 | conf->previous_raid_disks = mddev->raid_disks; |
4448 | else | 4520 | else |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 07a7a4102f05..e7baabffee86 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -386,7 +386,15 @@ struct raid5_private_data { | |||
386 | /* per cpu variables */ | 386 | /* per cpu variables */ |
387 | struct raid5_percpu { | 387 | struct raid5_percpu { |
388 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 388 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |
389 | void *scribble; /* space for constructing buffer | ||
390 | * lists and performing address | ||
391 | * conversions | ||
392 | */ | ||
389 | } *percpu; | 393 | } *percpu; |
394 | size_t scribble_len; /* size of scribble region must be | ||
395 | * associated with conf to handle | ||
396 | * cpu hotplug while reshaping | ||
397 | */ | ||
390 | #ifdef CONFIG_HOTPLUG_CPU | 398 | #ifdef CONFIG_HOTPLUG_CPU |
391 | struct notifier_block cpu_notify; | 399 | struct notifier_block cpu_notify; |
392 | #endif | 400 | #endif |