aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c132
-rw-r--r--drivers/md/raid5.h8
2 files changed, 110 insertions, 30 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5359236a1ec7..7727954cf726 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -642,11 +642,18 @@ static void ops_complete_compute5(void *stripe_head_ref)
642 release_stripe(sh); 642 release_stripe(sh);
643} 643}
644 644
645static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) 645/* return a pointer to the address conversion region of the scribble buffer */
646static addr_conv_t *to_addr_conv(struct stripe_head *sh,
647 struct raid5_percpu *percpu)
648{
649 return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
650}
651
652static struct dma_async_tx_descriptor *
653ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
646{ 654{
647 /* kernel stack size limits the total number of disks */
648 int disks = sh->disks; 655 int disks = sh->disks;
649 struct page *xor_srcs[disks]; 656 struct page **xor_srcs = percpu->scribble;
650 int target = sh->ops.target; 657 int target = sh->ops.target;
651 struct r5dev *tgt = &sh->dev[target]; 658 struct r5dev *tgt = &sh->dev[target];
652 struct page *xor_dest = tgt->page; 659 struct page *xor_dest = tgt->page;
@@ -666,7 +673,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
666 atomic_inc(&sh->count); 673 atomic_inc(&sh->count);
667 674
668 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, 675 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
669 ops_complete_compute5, sh, NULL); 676 ops_complete_compute5, sh, to_addr_conv(sh, percpu));
670 if (unlikely(count == 1)) 677 if (unlikely(count == 1))
671 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); 678 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
672 else 679 else
@@ -684,11 +691,11 @@ static void ops_complete_prexor(void *stripe_head_ref)
684} 691}
685 692
686static struct dma_async_tx_descriptor * 693static struct dma_async_tx_descriptor *
687ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 694ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
695 struct dma_async_tx_descriptor *tx)
688{ 696{
689 /* kernel stack size limits the total number of disks */
690 int disks = sh->disks; 697 int disks = sh->disks;
691 struct page *xor_srcs[disks]; 698 struct page **xor_srcs = percpu->scribble;
692 int count = 0, pd_idx = sh->pd_idx, i; 699 int count = 0, pd_idx = sh->pd_idx, i;
693 struct async_submit_ctl submit; 700 struct async_submit_ctl submit;
694 701
@@ -706,7 +713,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
706 } 713 }
707 714
708 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, 715 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
709 ops_complete_prexor, sh, NULL); 716 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
710 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); 717 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
711 718
712 return tx; 719 return tx;
@@ -775,11 +782,11 @@ static void ops_complete_postxor(void *stripe_head_ref)
775} 782}
776 783
777static void 784static void
778ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 785ops_run_postxor(struct stripe_head *sh, struct raid5_percpu *percpu,
786 struct dma_async_tx_descriptor *tx)
779{ 787{
780 /* kernel stack size limits the total number of disks */
781 int disks = sh->disks; 788 int disks = sh->disks;
782 struct page *xor_srcs[disks]; 789 struct page **xor_srcs = percpu->scribble;
783 struct async_submit_ctl submit; 790 struct async_submit_ctl submit;
784 int count = 0, pd_idx = sh->pd_idx, i; 791 int count = 0, pd_idx = sh->pd_idx, i;
785 struct page *xor_dest; 792 struct page *xor_dest;
@@ -819,7 +826,8 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
819 826
820 atomic_inc(&sh->count); 827 atomic_inc(&sh->count);
821 828
822 init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL); 829 init_async_submit(&submit, flags, tx, ops_complete_postxor, sh,
830 to_addr_conv(sh, percpu));
823 if (unlikely(count == 1)) 831 if (unlikely(count == 1))
824 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); 832 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
825 else 833 else
@@ -838,11 +846,10 @@ static void ops_complete_check(void *stripe_head_ref)
838 release_stripe(sh); 846 release_stripe(sh);
839} 847}
840 848
841static void ops_run_check(struct stripe_head *sh) 849static void ops_run_check(struct stripe_head *sh, struct raid5_percpu *percpu)
842{ 850{
843 /* kernel stack size limits the total number of disks */
844 int disks = sh->disks; 851 int disks = sh->disks;
845 struct page *xor_srcs[disks]; 852 struct page **xor_srcs = percpu->scribble;
846 struct dma_async_tx_descriptor *tx; 853 struct dma_async_tx_descriptor *tx;
847 struct async_submit_ctl submit; 854 struct async_submit_ctl submit;
848 855
@@ -858,7 +865,8 @@ static void ops_run_check(struct stripe_head *sh)
858 xor_srcs[count++] = dev->page; 865 xor_srcs[count++] = dev->page;
859 } 866 }
860 867
861 init_async_submit(&submit, 0, NULL, NULL, NULL, NULL); 868 init_async_submit(&submit, 0, NULL, NULL, NULL,
869 to_addr_conv(sh, percpu));
862 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 870 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
863 &sh->ops.zero_sum_result, &submit); 871 &sh->ops.zero_sum_result, &submit);
864 872
@@ -871,21 +879,26 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
871{ 879{
872 int overlap_clear = 0, i, disks = sh->disks; 880 int overlap_clear = 0, i, disks = sh->disks;
873 struct dma_async_tx_descriptor *tx = NULL; 881 struct dma_async_tx_descriptor *tx = NULL;
882 raid5_conf_t *conf = sh->raid_conf;
883 struct raid5_percpu *percpu;
884 unsigned long cpu;
874 885
886 cpu = get_cpu();
887 percpu = per_cpu_ptr(conf->percpu, cpu);
875 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 888 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
876 ops_run_biofill(sh); 889 ops_run_biofill(sh);
877 overlap_clear++; 890 overlap_clear++;
878 } 891 }
879 892
880 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 893 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
881 tx = ops_run_compute5(sh); 894 tx = ops_run_compute5(sh, percpu);
882 /* terminate the chain if postxor is not set to be run */ 895 /* terminate the chain if postxor is not set to be run */
883 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) 896 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
884 async_tx_ack(tx); 897 async_tx_ack(tx);
885 } 898 }
886 899
887 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 900 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
888 tx = ops_run_prexor(sh, tx); 901 tx = ops_run_prexor(sh, percpu, tx);
889 902
890 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 903 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
891 tx = ops_run_biodrain(sh, tx); 904 tx = ops_run_biodrain(sh, tx);
@@ -893,10 +906,10 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
893 } 906 }
894 907
895 if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) 908 if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
896 ops_run_postxor(sh, tx); 909 ops_run_postxor(sh, percpu, tx);
897 910
898 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 911 if (test_bit(STRIPE_OP_CHECK, &ops_request))
899 ops_run_check(sh); 912 ops_run_check(sh, percpu);
900 913
901 if (overlap_clear) 914 if (overlap_clear)
902 for (i = disks; i--; ) { 915 for (i = disks; i--; ) {
@@ -904,6 +917,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
904 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 917 if (test_and_clear_bit(R5_Overlap, &dev->flags))
905 wake_up(&sh->raid_conf->wait_for_overlap); 918 wake_up(&sh->raid_conf->wait_for_overlap);
906 } 919 }
920 put_cpu();
907} 921}
908 922
909static int grow_one_stripe(raid5_conf_t *conf) 923static int grow_one_stripe(raid5_conf_t *conf)
@@ -953,6 +967,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
953 return 0; 967 return 0;
954} 968}
955 969
970/**
971 * scribble_len - return the required size of the scribble region
972 * @num - total number of disks in the array
973 *
974 * The size must be enough to contain:
975 * 1/ a struct page pointer for each device in the array +2
976 * 2/ room to convert each entry in (1) to its corresponding dma
977 * (dma_map_page()) or page (page_address()) address.
978 *
979 * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
980 * calculate over all devices (not just the data blocks), using zeros in place
981 * of the P and Q blocks.
982 */
983static size_t scribble_len(int num)
984{
985 size_t len;
986
987 len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
988
989 return len;
990}
991
956static int resize_stripes(raid5_conf_t *conf, int newsize) 992static int resize_stripes(raid5_conf_t *conf, int newsize)
957{ 993{
958 /* Make all the stripes able to hold 'newsize' devices. 994 /* Make all the stripes able to hold 'newsize' devices.
@@ -981,6 +1017,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
981 struct stripe_head *osh, *nsh; 1017 struct stripe_head *osh, *nsh;
982 LIST_HEAD(newstripes); 1018 LIST_HEAD(newstripes);
983 struct disk_info *ndisks; 1019 struct disk_info *ndisks;
1020 unsigned long cpu;
984 int err; 1021 int err;
985 struct kmem_cache *sc; 1022 struct kmem_cache *sc;
986 int i; 1023 int i;
@@ -1046,7 +1083,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1046 /* Step 3. 1083 /* Step 3.
1047 * At this point, we are holding all the stripes so the array 1084 * At this point, we are holding all the stripes so the array
1048 * is completely stalled, so now is a good time to resize 1085 * is completely stalled, so now is a good time to resize
1049 * conf->disks. 1086 * conf->disks and the scribble region
1050 */ 1087 */
1051 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); 1088 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
1052 if (ndisks) { 1089 if (ndisks) {
@@ -1057,10 +1094,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1057 } else 1094 } else
1058 err = -ENOMEM; 1095 err = -ENOMEM;
1059 1096
1097 get_online_cpus();
1098 conf->scribble_len = scribble_len(newsize);
1099 for_each_present_cpu(cpu) {
1100 struct raid5_percpu *percpu;
1101 void *scribble;
1102
1103 percpu = per_cpu_ptr(conf->percpu, cpu);
1104 scribble = kmalloc(conf->scribble_len, GFP_NOIO);
1105
1106 if (scribble) {
1107 kfree(percpu->scribble);
1108 percpu->scribble = scribble;
1109 } else {
1110 err = -ENOMEM;
1111 break;
1112 }
1113 }
1114 put_online_cpus();
1115
1060 /* Step 4, return new stripes to service */ 1116 /* Step 4, return new stripes to service */
1061 while(!list_empty(&newstripes)) { 1117 while(!list_empty(&newstripes)) {
1062 nsh = list_entry(newstripes.next, struct stripe_head, lru); 1118 nsh = list_entry(newstripes.next, struct stripe_head, lru);
1063 list_del_init(&nsh->lru); 1119 list_del_init(&nsh->lru);
1120
1064 for (i=conf->raid_disks; i < newsize; i++) 1121 for (i=conf->raid_disks; i < newsize; i++)
1065 if (nsh->dev[i].page == NULL) { 1122 if (nsh->dev[i].page == NULL) {
1066 struct page *p = alloc_page(GFP_NOIO); 1123 struct page *p = alloc_page(GFP_NOIO);
@@ -4318,6 +4375,7 @@ static void raid5_free_percpu(raid5_conf_t *conf)
4318 for_each_possible_cpu(cpu) { 4375 for_each_possible_cpu(cpu) {
4319 percpu = per_cpu_ptr(conf->percpu, cpu); 4376 percpu = per_cpu_ptr(conf->percpu, cpu);
4320 safe_put_page(percpu->spare_page); 4377 safe_put_page(percpu->spare_page);
4378 kfree(percpu->scribble);
4321 } 4379 }
4322#ifdef CONFIG_HOTPLUG_CPU 4380#ifdef CONFIG_HOTPLUG_CPU
4323 unregister_cpu_notifier(&conf->cpu_notify); 4381 unregister_cpu_notifier(&conf->cpu_notify);
@@ -4347,9 +4405,15 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4347 switch (action) { 4405 switch (action) {
4348 case CPU_UP_PREPARE: 4406 case CPU_UP_PREPARE:
4349 case CPU_UP_PREPARE_FROZEN: 4407 case CPU_UP_PREPARE_FROZEN:
4350 if (!percpu->spare_page) 4408 if (conf->level == 6 && !percpu->spare_page)
4351 percpu->spare_page = alloc_page(GFP_KERNEL); 4409 percpu->spare_page = alloc_page(GFP_KERNEL);
4352 if (!percpu->spare_page) { 4410 if (!percpu->scribble)
4411 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4412
4413 if (!percpu->scribble ||
4414 (conf->level == 6 && !percpu->spare_page)) {
4415 safe_put_page(percpu->spare_page);
4416 kfree(percpu->scribble);
4353 pr_err("%s: failed memory allocation for cpu%ld\n", 4417 pr_err("%s: failed memory allocation for cpu%ld\n",
4354 __func__, cpu); 4418 __func__, cpu);
4355 return NOTIFY_BAD; 4419 return NOTIFY_BAD;
@@ -4358,7 +4422,9 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4358 case CPU_DEAD: 4422 case CPU_DEAD:
4359 case CPU_DEAD_FROZEN: 4423 case CPU_DEAD_FROZEN:
4360 safe_put_page(percpu->spare_page); 4424 safe_put_page(percpu->spare_page);
4425 kfree(percpu->scribble);
4361 percpu->spare_page = NULL; 4426 percpu->spare_page = NULL;
4427 percpu->scribble = NULL;
4362 break; 4428 break;
4363 default: 4429 default:
4364 break; 4430 break;
@@ -4372,12 +4438,9 @@ static int raid5_alloc_percpu(raid5_conf_t *conf)
4372 unsigned long cpu; 4438 unsigned long cpu;
4373 struct page *spare_page; 4439 struct page *spare_page;
4374 struct raid5_percpu *allcpus; 4440 struct raid5_percpu *allcpus;
4441 void *scribble;
4375 int err; 4442 int err;
4376 4443
4377 /* the only percpu data is the raid6 spare page */
4378 if (conf->level != 6)
4379 return 0;
4380
4381 allcpus = alloc_percpu(struct raid5_percpu); 4444 allcpus = alloc_percpu(struct raid5_percpu);
4382 if (!allcpus) 4445 if (!allcpus)
4383 return -ENOMEM; 4446 return -ENOMEM;
@@ -4386,12 +4449,20 @@ static int raid5_alloc_percpu(raid5_conf_t *conf)
4386 get_online_cpus(); 4449 get_online_cpus();
4387 err = 0; 4450 err = 0;
4388 for_each_present_cpu(cpu) { 4451 for_each_present_cpu(cpu) {
4389 spare_page = alloc_page(GFP_KERNEL); 4452 if (conf->level == 6) {
4390 if (!spare_page) { 4453 spare_page = alloc_page(GFP_KERNEL);
4454 if (!spare_page) {
4455 err = -ENOMEM;
4456 break;
4457 }
4458 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4459 }
4460 scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
4461 if (!scribble) {
4391 err = -ENOMEM; 4462 err = -ENOMEM;
4392 break; 4463 break;
4393 } 4464 }
4394 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; 4465 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4395 } 4466 }
4396#ifdef CONFIG_HOTPLUG_CPU 4467#ifdef CONFIG_HOTPLUG_CPU
4397 conf->cpu_notify.notifier_call = raid456_cpu_notify; 4468 conf->cpu_notify.notifier_call = raid456_cpu_notify;
@@ -4443,6 +4514,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4443 goto abort; 4514 goto abort;
4444 4515
4445 conf->raid_disks = mddev->raid_disks; 4516 conf->raid_disks = mddev->raid_disks;
4517 conf->scribble_len = scribble_len(conf->raid_disks);
4446 if (mddev->reshape_position == MaxSector) 4518 if (mddev->reshape_position == MaxSector)
4447 conf->previous_raid_disks = mddev->raid_disks; 4519 conf->previous_raid_disks = mddev->raid_disks;
4448 else 4520 else
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 07a7a4102f05..e7baabffee86 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -386,7 +386,15 @@ struct raid5_private_data {
386 /* per cpu variables */ 386 /* per cpu variables */
387 struct raid5_percpu { 387 struct raid5_percpu {
388 struct page *spare_page; /* Used when checking P/Q in raid6 */ 388 struct page *spare_page; /* Used when checking P/Q in raid6 */
389 void *scribble; /* space for constructing buffer
390 * lists and performing address
391 * conversions
392 */
389 } *percpu; 393 } *percpu;
394 size_t scribble_len; /* size of scribble region must be
395 * associated with conf to handle
396 * cpu hotplug while reshaping
397 */
390#ifdef CONFIG_HOTPLUG_CPU 398#ifdef CONFIG_HOTPLUG_CPU
391 struct notifier_block cpu_notify; 399 struct notifier_block cpu_notify;
392#endif 400#endif