diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 14 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 109 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 29 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 52 | ||||
-rw-r--r-- | drivers/md/dm-mpath.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 97 | ||||
-rw-r--r-- | drivers/md/dm.c | 40 | ||||
-rw-r--r-- | drivers/md/dm.h | 10 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 143 | ||||
-rw-r--r-- | drivers/md/md.c | 96 | ||||
-rw-r--r-- | drivers/md/multipath.c | 21 | ||||
-rw-r--r-- | drivers/md/raid0.c | 15 | ||||
-rw-r--r-- | drivers/md/raid1.c | 14 | ||||
-rw-r--r-- | drivers/md/raid10.c | 20 | ||||
-rw-r--r-- | drivers/md/raid5.c | 117 | ||||
-rw-r--r-- | drivers/md/raid6.h | 9 |
20 files changed, 442 insertions, 366 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 07d92c11b5d..2281b5098e9 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -30,6 +30,20 @@ config BLK_DEV_MD | |||
30 | 30 | ||
31 | If unsure, say N. | 31 | If unsure, say N. |
32 | 32 | ||
33 | config MD_AUTODETECT | ||
34 | bool "Autodetect RAID arrays during kernel boot" | ||
35 | depends on BLK_DEV_MD=y | ||
36 | default y | ||
37 | ---help--- | ||
38 | If you say Y here, then the kernel will try to autodetect raid | ||
39 | arrays as part of its boot process. | ||
40 | |||
41 | If you don't use raid and say Y, this autodetection can cause | ||
42 | a several-second delay in the boot time due to various | ||
43 | synchronisation steps that are part of this step. | ||
44 | |||
45 | If unsure, say Y. | ||
46 | |||
33 | config MD_LINEAR | 47 | config MD_LINEAR |
34 | tristate "Linear (append) mode" | 48 | tristate "Linear (append) mode" |
35 | depends on BLK_DEV_MD | 49 | depends on BLK_DEV_MD |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 13956437bc8..682ef9e6acd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -333,7 +333,6 @@ static void crypt_convert_init(struct crypt_config *cc, | |||
333 | ctx->idx_out = bio_out ? bio_out->bi_idx : 0; | 333 | ctx->idx_out = bio_out ? bio_out->bi_idx : 0; |
334 | ctx->sector = sector + cc->iv_offset; | 334 | ctx->sector = sector + cc->iv_offset; |
335 | init_completion(&ctx->restart); | 335 | init_completion(&ctx->restart); |
336 | atomic_set(&ctx->pending, 1); | ||
337 | } | 336 | } |
338 | 337 | ||
339 | static int crypt_convert_block(struct crypt_config *cc, | 338 | static int crypt_convert_block(struct crypt_config *cc, |
@@ -408,6 +407,8 @@ static int crypt_convert(struct crypt_config *cc, | |||
408 | { | 407 | { |
409 | int r; | 408 | int r; |
410 | 409 | ||
410 | atomic_set(&ctx->pending, 1); | ||
411 | |||
411 | while(ctx->idx_in < ctx->bio_in->bi_vcnt && | 412 | while(ctx->idx_in < ctx->bio_in->bi_vcnt && |
412 | ctx->idx_out < ctx->bio_out->bi_vcnt) { | 413 | ctx->idx_out < ctx->bio_out->bi_vcnt) { |
413 | 414 | ||
@@ -456,9 +457,11 @@ static void dm_crypt_bio_destructor(struct bio *bio) | |||
456 | /* | 457 | /* |
457 | * Generate a new unfragmented bio with the given size | 458 | * Generate a new unfragmented bio with the given size |
458 | * This should never violate the device limitations | 459 | * This should never violate the device limitations |
459 | * May return a smaller bio when running out of pages | 460 | * May return a smaller bio when running out of pages, indicated by |
461 | * *out_of_pages set to 1. | ||
460 | */ | 462 | */ |
461 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) | 463 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, |
464 | unsigned *out_of_pages) | ||
462 | { | 465 | { |
463 | struct crypt_config *cc = io->target->private; | 466 | struct crypt_config *cc = io->target->private; |
464 | struct bio *clone; | 467 | struct bio *clone; |
@@ -472,11 +475,14 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) | |||
472 | return NULL; | 475 | return NULL; |
473 | 476 | ||
474 | clone_init(io, clone); | 477 | clone_init(io, clone); |
478 | *out_of_pages = 0; | ||
475 | 479 | ||
476 | for (i = 0; i < nr_iovecs; i++) { | 480 | for (i = 0; i < nr_iovecs; i++) { |
477 | page = mempool_alloc(cc->page_pool, gfp_mask); | 481 | page = mempool_alloc(cc->page_pool, gfp_mask); |
478 | if (!page) | 482 | if (!page) { |
483 | *out_of_pages = 1; | ||
479 | break; | 484 | break; |
485 | } | ||
480 | 486 | ||
481 | /* | 487 | /* |
482 | * if additional pages cannot be allocated without waiting, | 488 | * if additional pages cannot be allocated without waiting, |
@@ -517,6 +523,27 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) | |||
517 | } | 523 | } |
518 | } | 524 | } |
519 | 525 | ||
526 | static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, | ||
527 | struct bio *bio, sector_t sector) | ||
528 | { | ||
529 | struct crypt_config *cc = ti->private; | ||
530 | struct dm_crypt_io *io; | ||
531 | |||
532 | io = mempool_alloc(cc->io_pool, GFP_NOIO); | ||
533 | io->target = ti; | ||
534 | io->base_bio = bio; | ||
535 | io->sector = sector; | ||
536 | io->error = 0; | ||
537 | atomic_set(&io->pending, 0); | ||
538 | |||
539 | return io; | ||
540 | } | ||
541 | |||
542 | static void crypt_inc_pending(struct dm_crypt_io *io) | ||
543 | { | ||
544 | atomic_inc(&io->pending); | ||
545 | } | ||
546 | |||
520 | /* | 547 | /* |
521 | * One of the bios was finished. Check for completion of | 548 | * One of the bios was finished. Check for completion of |
522 | * the whole request and correctly clean up the buffer. | 549 | * the whole request and correctly clean up the buffer. |
@@ -591,7 +618,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) | |||
591 | struct bio *base_bio = io->base_bio; | 618 | struct bio *base_bio = io->base_bio; |
592 | struct bio *clone; | 619 | struct bio *clone; |
593 | 620 | ||
594 | atomic_inc(&io->pending); | 621 | crypt_inc_pending(io); |
595 | 622 | ||
596 | /* | 623 | /* |
597 | * The block layer might modify the bvec array, so always | 624 | * The block layer might modify the bvec array, so always |
@@ -653,6 +680,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, | |||
653 | crypt_free_buffer_pages(cc, clone); | 680 | crypt_free_buffer_pages(cc, clone); |
654 | bio_put(clone); | 681 | bio_put(clone); |
655 | io->error = -EIO; | 682 | io->error = -EIO; |
683 | crypt_dec_pending(io); | ||
656 | return; | 684 | return; |
657 | } | 685 | } |
658 | 686 | ||
@@ -664,28 +692,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, | |||
664 | 692 | ||
665 | if (async) | 693 | if (async) |
666 | kcryptd_queue_io(io); | 694 | kcryptd_queue_io(io); |
667 | else { | 695 | else |
668 | atomic_inc(&io->pending); | ||
669 | generic_make_request(clone); | 696 | generic_make_request(clone); |
670 | } | ||
671 | } | 697 | } |
672 | 698 | ||
673 | static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) | 699 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) |
674 | { | 700 | { |
675 | struct crypt_config *cc = io->target->private; | 701 | struct crypt_config *cc = io->target->private; |
676 | struct bio *clone; | 702 | struct bio *clone; |
703 | int crypt_finished; | ||
704 | unsigned out_of_pages = 0; | ||
677 | unsigned remaining = io->base_bio->bi_size; | 705 | unsigned remaining = io->base_bio->bi_size; |
678 | int r; | 706 | int r; |
679 | 707 | ||
680 | /* | 708 | /* |
709 | * Prevent io from disappearing until this function completes. | ||
710 | */ | ||
711 | crypt_inc_pending(io); | ||
712 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); | ||
713 | |||
714 | /* | ||
681 | * The allocated buffers can be smaller than the whole bio, | 715 | * The allocated buffers can be smaller than the whole bio, |
682 | * so repeat the whole process until all the data can be handled. | 716 | * so repeat the whole process until all the data can be handled. |
683 | */ | 717 | */ |
684 | while (remaining) { | 718 | while (remaining) { |
685 | clone = crypt_alloc_buffer(io, remaining); | 719 | clone = crypt_alloc_buffer(io, remaining, &out_of_pages); |
686 | if (unlikely(!clone)) { | 720 | if (unlikely(!clone)) { |
687 | io->error = -ENOMEM; | 721 | io->error = -ENOMEM; |
688 | return; | 722 | break; |
689 | } | 723 | } |
690 | 724 | ||
691 | io->ctx.bio_out = clone; | 725 | io->ctx.bio_out = clone; |
@@ -693,37 +727,32 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) | |||
693 | 727 | ||
694 | remaining -= clone->bi_size; | 728 | remaining -= clone->bi_size; |
695 | 729 | ||
730 | crypt_inc_pending(io); | ||
696 | r = crypt_convert(cc, &io->ctx); | 731 | r = crypt_convert(cc, &io->ctx); |
732 | crypt_finished = atomic_dec_and_test(&io->ctx.pending); | ||
697 | 733 | ||
698 | if (atomic_dec_and_test(&io->ctx.pending)) { | 734 | /* Encryption was already finished, submit io now */ |
699 | /* processed, no running async crypto */ | 735 | if (crypt_finished) { |
700 | kcryptd_crypt_write_io_submit(io, r, 0); | 736 | kcryptd_crypt_write_io_submit(io, r, 0); |
701 | if (unlikely(r < 0)) | ||
702 | return; | ||
703 | } else | ||
704 | atomic_inc(&io->pending); | ||
705 | 737 | ||
706 | /* out of memory -> run queues */ | 738 | /* |
707 | if (unlikely(remaining)) { | 739 | * If there was an error, do not try next fragments. |
708 | /* wait for async crypto then reinitialize pending */ | 740 | * For async, error is processed in async handler. |
709 | wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); | 741 | */ |
710 | atomic_set(&io->ctx.pending, 1); | 742 | if (unlikely(r < 0)) |
711 | congestion_wait(WRITE, HZ/100); | 743 | break; |
712 | } | 744 | } |
713 | } | ||
714 | } | ||
715 | 745 | ||
716 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | 746 | /* |
717 | { | 747 | * Out of memory -> run queues |
718 | struct crypt_config *cc = io->target->private; | 748 | * But don't wait if split was due to the io size restriction |
719 | 749 | */ | |
720 | /* | 750 | if (unlikely(out_of_pages)) |
721 | * Prevent io from disappearing until this function completes. | 751 | congestion_wait(WRITE, HZ/100); |
722 | */ | ||
723 | atomic_inc(&io->pending); | ||
724 | 752 | ||
725 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); | 753 | if (unlikely(remaining)) |
726 | kcryptd_crypt_write_convert_loop(io); | 754 | wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); |
755 | } | ||
727 | 756 | ||
728 | crypt_dec_pending(io); | 757 | crypt_dec_pending(io); |
729 | } | 758 | } |
@@ -741,7 +770,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) | |||
741 | struct crypt_config *cc = io->target->private; | 770 | struct crypt_config *cc = io->target->private; |
742 | int r = 0; | 771 | int r = 0; |
743 | 772 | ||
744 | atomic_inc(&io->pending); | 773 | crypt_inc_pending(io); |
745 | 774 | ||
746 | crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, | 775 | crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, |
747 | io->sector); | 776 | io->sector); |
@@ -1108,15 +1137,9 @@ static void crypt_dtr(struct dm_target *ti) | |||
1108 | static int crypt_map(struct dm_target *ti, struct bio *bio, | 1137 | static int crypt_map(struct dm_target *ti, struct bio *bio, |
1109 | union map_info *map_context) | 1138 | union map_info *map_context) |
1110 | { | 1139 | { |
1111 | struct crypt_config *cc = ti->private; | ||
1112 | struct dm_crypt_io *io; | 1140 | struct dm_crypt_io *io; |
1113 | 1141 | ||
1114 | io = mempool_alloc(cc->io_pool, GFP_NOIO); | 1142 | io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); |
1115 | io->target = ti; | ||
1116 | io->base_bio = bio; | ||
1117 | io->sector = bio->bi_sector - ti->begin; | ||
1118 | io->error = 0; | ||
1119 | atomic_set(&io->pending, 0); | ||
1120 | 1143 | ||
1121 | if (bio_data_dir(io->base_bio) == READ) | 1144 | if (bio_data_dir(io->base_bio) == READ) |
1122 | kcryptd_queue_io(io); | 1145 | kcryptd_queue_io(io); |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 41f408068a7..769ab677f8e 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -108,12 +108,12 @@ struct pstore { | |||
108 | * Used to keep track of which metadata area the data in | 108 | * Used to keep track of which metadata area the data in |
109 | * 'chunk' refers to. | 109 | * 'chunk' refers to. |
110 | */ | 110 | */ |
111 | uint32_t current_area; | 111 | chunk_t current_area; |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * The next free chunk for an exception. | 114 | * The next free chunk for an exception. |
115 | */ | 115 | */ |
116 | uint32_t next_free; | 116 | chunk_t next_free; |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * The index of next free exception in the current | 119 | * The index of next free exception in the current |
@@ -175,7 +175,7 @@ static void do_metadata(struct work_struct *work) | |||
175 | /* | 175 | /* |
176 | * Read or write a chunk aligned and sized block of data from a device. | 176 | * Read or write a chunk aligned and sized block of data from a device. |
177 | */ | 177 | */ |
178 | static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) | 178 | static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) |
179 | { | 179 | { |
180 | struct dm_io_region where = { | 180 | struct dm_io_region where = { |
181 | .bdev = ps->snap->cow->bdev, | 181 | .bdev = ps->snap->cow->bdev, |
@@ -209,16 +209,23 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) | |||
209 | } | 209 | } |
210 | 210 | ||
211 | /* | 211 | /* |
212 | * Convert a metadata area index to a chunk index. | ||
213 | */ | ||
214 | static chunk_t area_location(struct pstore *ps, chunk_t area) | ||
215 | { | ||
216 | return 1 + ((ps->exceptions_per_area + 1) * area); | ||
217 | } | ||
218 | |||
219 | /* | ||
212 | * Read or write a metadata area. Remembering to skip the first | 220 | * Read or write a metadata area. Remembering to skip the first |
213 | * chunk which holds the header. | 221 | * chunk which holds the header. |
214 | */ | 222 | */ |
215 | static int area_io(struct pstore *ps, uint32_t area, int rw) | 223 | static int area_io(struct pstore *ps, chunk_t area, int rw) |
216 | { | 224 | { |
217 | int r; | 225 | int r; |
218 | uint32_t chunk; | 226 | chunk_t chunk; |
219 | 227 | ||
220 | /* convert a metadata area index to a chunk index */ | 228 | chunk = area_location(ps, area); |
221 | chunk = 1 + ((ps->exceptions_per_area + 1) * area); | ||
222 | 229 | ||
223 | r = chunk_io(ps, chunk, rw, 0); | 230 | r = chunk_io(ps, chunk, rw, 0); |
224 | if (r) | 231 | if (r) |
@@ -228,7 +235,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) | |||
228 | return 0; | 235 | return 0; |
229 | } | 236 | } |
230 | 237 | ||
231 | static int zero_area(struct pstore *ps, uint32_t area) | 238 | static int zero_area(struct pstore *ps, chunk_t area) |
232 | { | 239 | { |
233 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); | 240 | memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); |
234 | return area_io(ps, area, WRITE); | 241 | return area_io(ps, area, WRITE); |
@@ -404,7 +411,7 @@ static int insert_exceptions(struct pstore *ps, int *full) | |||
404 | 411 | ||
405 | static int read_exceptions(struct pstore *ps) | 412 | static int read_exceptions(struct pstore *ps) |
406 | { | 413 | { |
407 | uint32_t area; | 414 | chunk_t area; |
408 | int r, full = 1; | 415 | int r, full = 1; |
409 | 416 | ||
410 | /* | 417 | /* |
@@ -517,6 +524,7 @@ static int persistent_prepare(struct exception_store *store, | |||
517 | { | 524 | { |
518 | struct pstore *ps = get_info(store); | 525 | struct pstore *ps = get_info(store); |
519 | uint32_t stride; | 526 | uint32_t stride; |
527 | chunk_t next_free; | ||
520 | sector_t size = get_dev_size(store->snap->cow->bdev); | 528 | sector_t size = get_dev_size(store->snap->cow->bdev); |
521 | 529 | ||
522 | /* Is there enough room ? */ | 530 | /* Is there enough room ? */ |
@@ -530,7 +538,8 @@ static int persistent_prepare(struct exception_store *store, | |||
530 | * into account the location of the metadata chunks. | 538 | * into account the location of the metadata chunks. |
531 | */ | 539 | */ |
532 | stride = (ps->exceptions_per_area + 1); | 540 | stride = (ps->exceptions_per_area + 1); |
533 | if ((++ps->next_free % stride) == 1) | 541 | next_free = ++ps->next_free; |
542 | if (sector_div(next_free, stride) == 1) | ||
534 | ps->next_free++; | 543 | ps->next_free++; |
535 | 544 | ||
536 | atomic_inc(&ps->pending_count); | 545 | atomic_inc(&ps->pending_count); |
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de..dca401dc70a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c | |||
@@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) | |||
426 | old_nl->next = (uint32_t) ((void *) nl - | 426 | old_nl->next = (uint32_t) ((void *) nl - |
427 | (void *) old_nl); | 427 | (void *) old_nl); |
428 | disk = dm_disk(hc->md); | 428 | disk = dm_disk(hc->md); |
429 | nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); | 429 | nl->dev = huge_encode_dev(disk_devt(disk)); |
430 | nl->next = 0; | 430 | nl->next = 0; |
431 | strcpy(nl->name, hc->name); | 431 | strcpy(nl->name, hc->name); |
432 | 432 | ||
@@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) | |||
539 | if (dm_suspended(md)) | 539 | if (dm_suspended(md)) |
540 | param->flags |= DM_SUSPEND_FLAG; | 540 | param->flags |= DM_SUSPEND_FLAG; |
541 | 541 | ||
542 | param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); | 542 | param->dev = huge_encode_dev(disk_devt(disk)); |
543 | 543 | ||
544 | /* | 544 | /* |
545 | * Yes, this will be out of date by the time it gets back | 545 | * Yes, this will be out of date by the time it gets back |
@@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) | |||
548 | */ | 548 | */ |
549 | param->open_count = dm_open_count(md); | 549 | param->open_count = dm_open_count(md); |
550 | 550 | ||
551 | if (disk->policy) | 551 | if (get_disk_ro(disk)) |
552 | param->flags |= DM_READONLY_FLAG; | 552 | param->flags |= DM_READONLY_FLAG; |
553 | 553 | ||
554 | param->event_nr = dm_get_event_nr(md); | 554 | param->event_nr = dm_get_event_nr(md); |
@@ -1131,7 +1131,7 @@ static void retrieve_deps(struct dm_table *table, | |||
1131 | unsigned int count = 0; | 1131 | unsigned int count = 0; |
1132 | struct list_head *tmp; | 1132 | struct list_head *tmp; |
1133 | size_t len, needed; | 1133 | size_t len, needed; |
1134 | struct dm_dev *dd; | 1134 | struct dm_dev_internal *dd; |
1135 | struct dm_target_deps *deps; | 1135 | struct dm_target_deps *deps; |
1136 | 1136 | ||
1137 | deps = get_result_buffer(param, param_size, &len); | 1137 | deps = get_result_buffer(param, param_size, &len); |
@@ -1157,7 +1157,7 @@ static void retrieve_deps(struct dm_table *table, | |||
1157 | deps->count = count; | 1157 | deps->count = count; |
1158 | count = 0; | 1158 | count = 0; |
1159 | list_for_each_entry (dd, dm_table_get_devices(table), list) | 1159 | list_for_each_entry (dd, dm_table_get_devices(table), list) |
1160 | deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev); | 1160 | deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev); |
1161 | 1161 | ||
1162 | param->data_size = param->data_start + needed; | 1162 | param->data_size = param->data_start + needed; |
1163 | } | 1163 | } |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c7..9bf3460c554 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -30,9 +30,11 @@ struct pgpath { | |||
30 | struct list_head list; | 30 | struct list_head list; |
31 | 31 | ||
32 | struct priority_group *pg; /* Owning PG */ | 32 | struct priority_group *pg; /* Owning PG */ |
33 | unsigned is_active; /* Path status */ | ||
33 | unsigned fail_count; /* Cumulative failure count */ | 34 | unsigned fail_count; /* Cumulative failure count */ |
34 | 35 | ||
35 | struct dm_path path; | 36 | struct dm_path path; |
37 | struct work_struct deactivate_path; | ||
36 | }; | 38 | }; |
37 | 39 | ||
38 | #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) | 40 | #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) |
@@ -112,6 +114,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; | |||
112 | static void process_queued_ios(struct work_struct *work); | 114 | static void process_queued_ios(struct work_struct *work); |
113 | static void trigger_event(struct work_struct *work); | 115 | static void trigger_event(struct work_struct *work); |
114 | static void activate_path(struct work_struct *work); | 116 | static void activate_path(struct work_struct *work); |
117 | static void deactivate_path(struct work_struct *work); | ||
115 | 118 | ||
116 | 119 | ||
117 | /*----------------------------------------------- | 120 | /*----------------------------------------------- |
@@ -122,8 +125,10 @@ static struct pgpath *alloc_pgpath(void) | |||
122 | { | 125 | { |
123 | struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); | 126 | struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); |
124 | 127 | ||
125 | if (pgpath) | 128 | if (pgpath) { |
126 | pgpath->path.is_active = 1; | 129 | pgpath->is_active = 1; |
130 | INIT_WORK(&pgpath->deactivate_path, deactivate_path); | ||
131 | } | ||
127 | 132 | ||
128 | return pgpath; | 133 | return pgpath; |
129 | } | 134 | } |
@@ -133,6 +138,14 @@ static void free_pgpath(struct pgpath *pgpath) | |||
133 | kfree(pgpath); | 138 | kfree(pgpath); |
134 | } | 139 | } |
135 | 140 | ||
141 | static void deactivate_path(struct work_struct *work) | ||
142 | { | ||
143 | struct pgpath *pgpath = | ||
144 | container_of(work, struct pgpath, deactivate_path); | ||
145 | |||
146 | blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); | ||
147 | } | ||
148 | |||
136 | static struct priority_group *alloc_priority_group(void) | 149 | static struct priority_group *alloc_priority_group(void) |
137 | { | 150 | { |
138 | struct priority_group *pg; | 151 | struct priority_group *pg; |
@@ -563,12 +576,12 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | |||
563 | /* we need at least a path arg */ | 576 | /* we need at least a path arg */ |
564 | if (as->argc < 1) { | 577 | if (as->argc < 1) { |
565 | ti->error = "no device given"; | 578 | ti->error = "no device given"; |
566 | return NULL; | 579 | return ERR_PTR(-EINVAL); |
567 | } | 580 | } |
568 | 581 | ||
569 | p = alloc_pgpath(); | 582 | p = alloc_pgpath(); |
570 | if (!p) | 583 | if (!p) |
571 | return NULL; | 584 | return ERR_PTR(-ENOMEM); |
572 | 585 | ||
573 | r = dm_get_device(ti, shift(as), ti->begin, ti->len, | 586 | r = dm_get_device(ti, shift(as), ti->begin, ti->len, |
574 | dm_table_get_mode(ti->table), &p->path.dev); | 587 | dm_table_get_mode(ti->table), &p->path.dev); |
@@ -596,7 +609,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, | |||
596 | 609 | ||
597 | bad: | 610 | bad: |
598 | free_pgpath(p); | 611 | free_pgpath(p); |
599 | return NULL; | 612 | return ERR_PTR(r); |
600 | } | 613 | } |
601 | 614 | ||
602 | static struct priority_group *parse_priority_group(struct arg_set *as, | 615 | static struct priority_group *parse_priority_group(struct arg_set *as, |
@@ -614,14 +627,14 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
614 | 627 | ||
615 | if (as->argc < 2) { | 628 | if (as->argc < 2) { |
616 | as->argc = 0; | 629 | as->argc = 0; |
617 | ti->error = "not enough priority group aruments"; | 630 | ti->error = "not enough priority group arguments"; |
618 | return NULL; | 631 | return ERR_PTR(-EINVAL); |
619 | } | 632 | } |
620 | 633 | ||
621 | pg = alloc_priority_group(); | 634 | pg = alloc_priority_group(); |
622 | if (!pg) { | 635 | if (!pg) { |
623 | ti->error = "couldn't allocate priority group"; | 636 | ti->error = "couldn't allocate priority group"; |
624 | return NULL; | 637 | return ERR_PTR(-ENOMEM); |
625 | } | 638 | } |
626 | pg->m = m; | 639 | pg->m = m; |
627 | 640 | ||
@@ -654,8 +667,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
654 | path_args.argv = as->argv; | 667 | path_args.argv = as->argv; |
655 | 668 | ||
656 | pgpath = parse_path(&path_args, &pg->ps, ti); | 669 | pgpath = parse_path(&path_args, &pg->ps, ti); |
657 | if (!pgpath) | 670 | if (IS_ERR(pgpath)) { |
671 | r = PTR_ERR(pgpath); | ||
658 | goto bad; | 672 | goto bad; |
673 | } | ||
659 | 674 | ||
660 | pgpath->pg = pg; | 675 | pgpath->pg = pg; |
661 | list_add_tail(&pgpath->list, &pg->pgpaths); | 676 | list_add_tail(&pgpath->list, &pg->pgpaths); |
@@ -666,7 +681,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, | |||
666 | 681 | ||
667 | bad: | 682 | bad: |
668 | free_priority_group(pg, ti); | 683 | free_priority_group(pg, ti); |
669 | return NULL; | 684 | return ERR_PTR(r); |
670 | } | 685 | } |
671 | 686 | ||
672 | static int parse_hw_handler(struct arg_set *as, struct multipath *m) | 687 | static int parse_hw_handler(struct arg_set *as, struct multipath *m) |
@@ -785,8 +800,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, | |||
785 | struct priority_group *pg; | 800 | struct priority_group *pg; |
786 | 801 | ||
787 | pg = parse_priority_group(&as, m); | 802 | pg = parse_priority_group(&as, m); |
788 | if (!pg) { | 803 | if (IS_ERR(pg)) { |
789 | r = -EINVAL; | 804 | r = PTR_ERR(pg); |
790 | goto bad; | 805 | goto bad; |
791 | } | 806 | } |
792 | 807 | ||
@@ -834,7 +849,7 @@ static int multipath_map(struct dm_target *ti, struct bio *bio, | |||
834 | dm_bio_record(&mpio->details, bio); | 849 | dm_bio_record(&mpio->details, bio); |
835 | 850 | ||
836 | map_context->ptr = mpio; | 851 | map_context->ptr = mpio; |
837 | bio->bi_rw |= (1 << BIO_RW_FAILFAST); | 852 | bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); |
838 | r = map_io(m, bio, mpio, 0); | 853 | r = map_io(m, bio, mpio, 0); |
839 | if (r < 0 || r == DM_MAPIO_REQUEUE) | 854 | if (r < 0 || r == DM_MAPIO_REQUEUE) |
840 | mempool_free(mpio, m->mpio_pool); | 855 | mempool_free(mpio, m->mpio_pool); |
@@ -852,13 +867,13 @@ static int fail_path(struct pgpath *pgpath) | |||
852 | 867 | ||
853 | spin_lock_irqsave(&m->lock, flags); | 868 | spin_lock_irqsave(&m->lock, flags); |
854 | 869 | ||
855 | if (!pgpath->path.is_active) | 870 | if (!pgpath->is_active) |
856 | goto out; | 871 | goto out; |
857 | 872 | ||
858 | DMWARN("Failing path %s.", pgpath->path.dev->name); | 873 | DMWARN("Failing path %s.", pgpath->path.dev->name); |
859 | 874 | ||
860 | pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); | 875 | pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); |
861 | pgpath->path.is_active = 0; | 876 | pgpath->is_active = 0; |
862 | pgpath->fail_count++; | 877 | pgpath->fail_count++; |
863 | 878 | ||
864 | m->nr_valid_paths--; | 879 | m->nr_valid_paths--; |
@@ -870,6 +885,7 @@ static int fail_path(struct pgpath *pgpath) | |||
870 | pgpath->path.dev->name, m->nr_valid_paths); | 885 | pgpath->path.dev->name, m->nr_valid_paths); |
871 | 886 | ||
872 | queue_work(kmultipathd, &m->trigger_event); | 887 | queue_work(kmultipathd, &m->trigger_event); |
888 | queue_work(kmultipathd, &pgpath->deactivate_path); | ||
873 | 889 | ||
874 | out: | 890 | out: |
875 | spin_unlock_irqrestore(&m->lock, flags); | 891 | spin_unlock_irqrestore(&m->lock, flags); |
@@ -888,7 +904,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
888 | 904 | ||
889 | spin_lock_irqsave(&m->lock, flags); | 905 | spin_lock_irqsave(&m->lock, flags); |
890 | 906 | ||
891 | if (pgpath->path.is_active) | 907 | if (pgpath->is_active) |
892 | goto out; | 908 | goto out; |
893 | 909 | ||
894 | if (!pgpath->pg->ps.type->reinstate_path) { | 910 | if (!pgpath->pg->ps.type->reinstate_path) { |
@@ -902,7 +918,7 @@ static int reinstate_path(struct pgpath *pgpath) | |||
902 | if (r) | 918 | if (r) |
903 | goto out; | 919 | goto out; |
904 | 920 | ||
905 | pgpath->path.is_active = 1; | 921 | pgpath->is_active = 1; |
906 | 922 | ||
907 | m->current_pgpath = NULL; | 923 | m->current_pgpath = NULL; |
908 | if (!m->nr_valid_paths++ && m->queue_size) | 924 | if (!m->nr_valid_paths++ && m->queue_size) |
@@ -1290,7 +1306,7 @@ static int multipath_status(struct dm_target *ti, status_type_t type, | |||
1290 | 1306 | ||
1291 | list_for_each_entry(p, &pg->pgpaths, list) { | 1307 | list_for_each_entry(p, &pg->pgpaths, list) { |
1292 | DMEMIT("%s %s %u ", p->path.dev->name, | 1308 | DMEMIT("%s %s %u ", p->path.dev->name, |
1293 | p->path.is_active ? "A" : "F", | 1309 | p->is_active ? "A" : "F", |
1294 | p->fail_count); | 1310 | p->fail_count); |
1295 | if (pg->ps.type->status) | 1311 | if (pg->ps.type->status) |
1296 | sz += pg->ps.type->status(&pg->ps, | 1312 | sz += pg->ps.type->status(&pg->ps, |
diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h index c198b856a45..e230f719625 100644 --- a/drivers/md/dm-mpath.h +++ b/drivers/md/dm-mpath.h | |||
@@ -13,8 +13,6 @@ struct dm_dev; | |||
13 | 13 | ||
14 | struct dm_path { | 14 | struct dm_path { |
15 | struct dm_dev *dev; /* Read-only */ | 15 | struct dm_dev *dev; /* Read-only */ |
16 | unsigned is_active; /* Read-only */ | ||
17 | |||
18 | void *pscontext; /* For path-selector use */ | 16 | void *pscontext; /* For path-selector use */ |
19 | }; | 17 | }; |
20 | 18 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ff05fe89308..29913e42c4a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -842,7 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg) | |||
842 | } | 842 | } |
843 | 843 | ||
844 | /* hand to kcopyd */ | 844 | /* hand to kcopyd */ |
845 | set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); | 845 | if (!errors_handled(ms)) |
846 | set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); | ||
847 | |||
846 | r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, | 848 | r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, |
847 | flags, recovery_complete, reg); | 849 | flags, recovery_complete, reg); |
848 | 850 | ||
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab3968..b745d8ac625 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, | |||
284 | 284 | ||
285 | memset(major_minor, 0, sizeof(major_minor)); | 285 | memset(major_minor, 0, sizeof(major_minor)); |
286 | sprintf(major_minor, "%d:%d", | 286 | sprintf(major_minor, "%d:%d", |
287 | bio->bi_bdev->bd_disk->major, | 287 | MAJOR(disk_devt(bio->bi_bdev->bd_disk)), |
288 | bio->bi_bdev->bd_disk->first_minor); | 288 | MINOR(disk_devt(bio->bi_bdev->bd_disk))); |
289 | 289 | ||
290 | /* | 290 | /* |
291 | * Test to see which stripe drive triggered the event | 291 | * Test to see which stripe drive triggered the event |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 61f44140923..a740a6950f5 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -250,7 +250,8 @@ static void free_devices(struct list_head *devices) | |||
250 | struct list_head *tmp, *next; | 250 | struct list_head *tmp, *next; |
251 | 251 | ||
252 | list_for_each_safe(tmp, next, devices) { | 252 | list_for_each_safe(tmp, next, devices) { |
253 | struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); | 253 | struct dm_dev_internal *dd = |
254 | list_entry(tmp, struct dm_dev_internal, list); | ||
254 | kfree(dd); | 255 | kfree(dd); |
255 | } | 256 | } |
256 | } | 257 | } |
@@ -327,12 +328,12 @@ static int lookup_device(const char *path, dev_t *dev) | |||
327 | /* | 328 | /* |
328 | * See if we've already got a device in the list. | 329 | * See if we've already got a device in the list. |
329 | */ | 330 | */ |
330 | static struct dm_dev *find_device(struct list_head *l, dev_t dev) | 331 | static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) |
331 | { | 332 | { |
332 | struct dm_dev *dd; | 333 | struct dm_dev_internal *dd; |
333 | 334 | ||
334 | list_for_each_entry (dd, l, list) | 335 | list_for_each_entry (dd, l, list) |
335 | if (dd->bdev->bd_dev == dev) | 336 | if (dd->dm_dev.bdev->bd_dev == dev) |
336 | return dd; | 337 | return dd; |
337 | 338 | ||
338 | return NULL; | 339 | return NULL; |
@@ -341,45 +342,47 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev) | |||
341 | /* | 342 | /* |
342 | * Open a device so we can use it as a map destination. | 343 | * Open a device so we can use it as a map destination. |
343 | */ | 344 | */ |
344 | static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md) | 345 | static int open_dev(struct dm_dev_internal *d, dev_t dev, |
346 | struct mapped_device *md) | ||
345 | { | 347 | { |
346 | static char *_claim_ptr = "I belong to device-mapper"; | 348 | static char *_claim_ptr = "I belong to device-mapper"; |
347 | struct block_device *bdev; | 349 | struct block_device *bdev; |
348 | 350 | ||
349 | int r; | 351 | int r; |
350 | 352 | ||
351 | BUG_ON(d->bdev); | 353 | BUG_ON(d->dm_dev.bdev); |
352 | 354 | ||
353 | bdev = open_by_devnum(dev, d->mode); | 355 | bdev = open_by_devnum(dev, d->dm_dev.mode); |
354 | if (IS_ERR(bdev)) | 356 | if (IS_ERR(bdev)) |
355 | return PTR_ERR(bdev); | 357 | return PTR_ERR(bdev); |
356 | r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); | 358 | r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); |
357 | if (r) | 359 | if (r) |
358 | blkdev_put(bdev); | 360 | blkdev_put(bdev); |
359 | else | 361 | else |
360 | d->bdev = bdev; | 362 | d->dm_dev.bdev = bdev; |
361 | return r; | 363 | return r; |
362 | } | 364 | } |
363 | 365 | ||
364 | /* | 366 | /* |
365 | * Close a device that we've been using. | 367 | * Close a device that we've been using. |
366 | */ | 368 | */ |
367 | static void close_dev(struct dm_dev *d, struct mapped_device *md) | 369 | static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) |
368 | { | 370 | { |
369 | if (!d->bdev) | 371 | if (!d->dm_dev.bdev) |
370 | return; | 372 | return; |
371 | 373 | ||
372 | bd_release_from_disk(d->bdev, dm_disk(md)); | 374 | bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); |
373 | blkdev_put(d->bdev); | 375 | blkdev_put(d->dm_dev.bdev); |
374 | d->bdev = NULL; | 376 | d->dm_dev.bdev = NULL; |
375 | } | 377 | } |
376 | 378 | ||
377 | /* | 379 | /* |
378 | * If possible, this checks an area of a destination device is valid. | 380 | * If possible, this checks an area of a destination device is valid. |
379 | */ | 381 | */ |
380 | static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) | 382 | static int check_device_area(struct dm_dev_internal *dd, sector_t start, |
383 | sector_t len) | ||
381 | { | 384 | { |
382 | sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT; | 385 | sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; |
383 | 386 | ||
384 | if (!dev_size) | 387 | if (!dev_size) |
385 | return 1; | 388 | return 1; |
@@ -392,16 +395,17 @@ static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) | |||
392 | * careful to leave things as they were if we fail to reopen the | 395 | * careful to leave things as they were if we fail to reopen the |
393 | * device. | 396 | * device. |
394 | */ | 397 | */ |
395 | static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md) | 398 | static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, |
399 | struct mapped_device *md) | ||
396 | { | 400 | { |
397 | int r; | 401 | int r; |
398 | struct dm_dev dd_copy; | 402 | struct dm_dev_internal dd_copy; |
399 | dev_t dev = dd->bdev->bd_dev; | 403 | dev_t dev = dd->dm_dev.bdev->bd_dev; |
400 | 404 | ||
401 | dd_copy = *dd; | 405 | dd_copy = *dd; |
402 | 406 | ||
403 | dd->mode |= new_mode; | 407 | dd->dm_dev.mode |= new_mode; |
404 | dd->bdev = NULL; | 408 | dd->dm_dev.bdev = NULL; |
405 | r = open_dev(dd, dev, md); | 409 | r = open_dev(dd, dev, md); |
406 | if (!r) | 410 | if (!r) |
407 | close_dev(&dd_copy, md); | 411 | close_dev(&dd_copy, md); |
@@ -421,7 +425,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
421 | { | 425 | { |
422 | int r; | 426 | int r; |
423 | dev_t uninitialized_var(dev); | 427 | dev_t uninitialized_var(dev); |
424 | struct dm_dev *dd; | 428 | struct dm_dev_internal *dd; |
425 | unsigned int major, minor; | 429 | unsigned int major, minor; |
426 | 430 | ||
427 | BUG_ON(!t); | 431 | BUG_ON(!t); |
@@ -443,20 +447,20 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
443 | if (!dd) | 447 | if (!dd) |
444 | return -ENOMEM; | 448 | return -ENOMEM; |
445 | 449 | ||
446 | dd->mode = mode; | 450 | dd->dm_dev.mode = mode; |
447 | dd->bdev = NULL; | 451 | dd->dm_dev.bdev = NULL; |
448 | 452 | ||
449 | if ((r = open_dev(dd, dev, t->md))) { | 453 | if ((r = open_dev(dd, dev, t->md))) { |
450 | kfree(dd); | 454 | kfree(dd); |
451 | return r; | 455 | return r; |
452 | } | 456 | } |
453 | 457 | ||
454 | format_dev_t(dd->name, dev); | 458 | format_dev_t(dd->dm_dev.name, dev); |
455 | 459 | ||
456 | atomic_set(&dd->count, 0); | 460 | atomic_set(&dd->count, 0); |
457 | list_add(&dd->list, &t->devices); | 461 | list_add(&dd->list, &t->devices); |
458 | 462 | ||
459 | } else if (dd->mode != (mode | dd->mode)) { | 463 | } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { |
460 | r = upgrade_mode(dd, mode, t->md); | 464 | r = upgrade_mode(dd, mode, t->md); |
461 | if (r) | 465 | if (r) |
462 | return r; | 466 | return r; |
@@ -465,11 +469,11 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
465 | 469 | ||
466 | if (!check_device_area(dd, start, len)) { | 470 | if (!check_device_area(dd, start, len)) { |
467 | DMWARN("device %s too small for target", path); | 471 | DMWARN("device %s too small for target", path); |
468 | dm_put_device(ti, dd); | 472 | dm_put_device(ti, &dd->dm_dev); |
469 | return -EINVAL; | 473 | return -EINVAL; |
470 | } | 474 | } |
471 | 475 | ||
472 | *result = dd; | 476 | *result = &dd->dm_dev; |
473 | 477 | ||
474 | return 0; | 478 | return 0; |
475 | } | 479 | } |
@@ -478,6 +482,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) | |||
478 | { | 482 | { |
479 | struct request_queue *q = bdev_get_queue(bdev); | 483 | struct request_queue *q = bdev_get_queue(bdev); |
480 | struct io_restrictions *rs = &ti->limits; | 484 | struct io_restrictions *rs = &ti->limits; |
485 | char b[BDEVNAME_SIZE]; | ||
486 | |||
487 | if (unlikely(!q)) { | ||
488 | DMWARN("%s: Cannot set limits for nonexistent device %s", | ||
489 | dm_device_name(ti->table->md), bdevname(bdev, b)); | ||
490 | return; | ||
491 | } | ||
481 | 492 | ||
482 | /* | 493 | /* |
483 | * Combine the device limits low. | 494 | * Combine the device limits low. |
@@ -540,8 +551,11 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start, | |||
540 | /* | 551 | /* |
541 | * Decrement a devices use count and remove it if necessary. | 552 | * Decrement a devices use count and remove it if necessary. |
542 | */ | 553 | */ |
543 | void dm_put_device(struct dm_target *ti, struct dm_dev *dd) | 554 | void dm_put_device(struct dm_target *ti, struct dm_dev *d) |
544 | { | 555 | { |
556 | struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, | ||
557 | dm_dev); | ||
558 | |||
545 | if (atomic_dec_and_test(&dd->count)) { | 559 | if (atomic_dec_and_test(&dd->count)) { |
546 | close_dev(dd, ti->table->md); | 560 | close_dev(dd, ti->table->md); |
547 | list_del(&dd->list); | 561 | list_del(&dd->list); |
@@ -937,13 +951,20 @@ int dm_table_resume_targets(struct dm_table *t) | |||
937 | 951 | ||
938 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) | 952 | int dm_table_any_congested(struct dm_table *t, int bdi_bits) |
939 | { | 953 | { |
940 | struct dm_dev *dd; | 954 | struct dm_dev_internal *dd; |
941 | struct list_head *devices = dm_table_get_devices(t); | 955 | struct list_head *devices = dm_table_get_devices(t); |
942 | int r = 0; | 956 | int r = 0; |
943 | 957 | ||
944 | list_for_each_entry(dd, devices, list) { | 958 | list_for_each_entry(dd, devices, list) { |
945 | struct request_queue *q = bdev_get_queue(dd->bdev); | 959 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); |
946 | r |= bdi_congested(&q->backing_dev_info, bdi_bits); | 960 | char b[BDEVNAME_SIZE]; |
961 | |||
962 | if (likely(q)) | ||
963 | r |= bdi_congested(&q->backing_dev_info, bdi_bits); | ||
964 | else | ||
965 | DMWARN_LIMIT("%s: any_congested: nonexistent device %s", | ||
966 | dm_device_name(t->md), | ||
967 | bdevname(dd->dm_dev.bdev, b)); | ||
947 | } | 968 | } |
948 | 969 | ||
949 | return r; | 970 | return r; |
@@ -951,13 +972,19 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) | |||
951 | 972 | ||
952 | void dm_table_unplug_all(struct dm_table *t) | 973 | void dm_table_unplug_all(struct dm_table *t) |
953 | { | 974 | { |
954 | struct dm_dev *dd; | 975 | struct dm_dev_internal *dd; |
955 | struct list_head *devices = dm_table_get_devices(t); | 976 | struct list_head *devices = dm_table_get_devices(t); |
956 | 977 | ||
957 | list_for_each_entry(dd, devices, list) { | 978 | list_for_each_entry(dd, devices, list) { |
958 | struct request_queue *q = bdev_get_queue(dd->bdev); | 979 | struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); |
959 | 980 | char b[BDEVNAME_SIZE]; | |
960 | blk_unplug(q); | 981 | |
982 | if (likely(q)) | ||
983 | blk_unplug(q); | ||
984 | else | ||
985 | DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s", | ||
986 | dm_device_name(t->md), | ||
987 | bdevname(dd->dm_dev.bdev, b)); | ||
961 | } | 988 | } |
962 | } | 989 | } |
963 | 990 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f..327de03a5bd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -377,13 +377,14 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | |||
377 | static void start_io_acct(struct dm_io *io) | 377 | static void start_io_acct(struct dm_io *io) |
378 | { | 378 | { |
379 | struct mapped_device *md = io->md; | 379 | struct mapped_device *md = io->md; |
380 | int cpu; | ||
380 | 381 | ||
381 | io->start_time = jiffies; | 382 | io->start_time = jiffies; |
382 | 383 | ||
383 | preempt_disable(); | 384 | cpu = part_stat_lock(); |
384 | disk_round_stats(dm_disk(md)); | 385 | part_round_stats(cpu, &dm_disk(md)->part0); |
385 | preempt_enable(); | 386 | part_stat_unlock(); |
386 | dm_disk(md)->in_flight = atomic_inc_return(&md->pending); | 387 | dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); |
387 | } | 388 | } |
388 | 389 | ||
389 | static int end_io_acct(struct dm_io *io) | 390 | static int end_io_acct(struct dm_io *io) |
@@ -391,15 +392,16 @@ static int end_io_acct(struct dm_io *io) | |||
391 | struct mapped_device *md = io->md; | 392 | struct mapped_device *md = io->md; |
392 | struct bio *bio = io->bio; | 393 | struct bio *bio = io->bio; |
393 | unsigned long duration = jiffies - io->start_time; | 394 | unsigned long duration = jiffies - io->start_time; |
394 | int pending; | 395 | int pending, cpu; |
395 | int rw = bio_data_dir(bio); | 396 | int rw = bio_data_dir(bio); |
396 | 397 | ||
397 | preempt_disable(); | 398 | cpu = part_stat_lock(); |
398 | disk_round_stats(dm_disk(md)); | 399 | part_round_stats(cpu, &dm_disk(md)->part0); |
399 | preempt_enable(); | 400 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); |
400 | dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); | 401 | part_stat_unlock(); |
401 | 402 | ||
402 | disk_stat_add(dm_disk(md), ticks[rw], duration); | 403 | dm_disk(md)->part0.in_flight = pending = |
404 | atomic_dec_return(&md->pending); | ||
403 | 405 | ||
404 | return !pending; | 406 | return !pending; |
405 | } | 407 | } |
@@ -885,6 +887,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
885 | int r = -EIO; | 887 | int r = -EIO; |
886 | int rw = bio_data_dir(bio); | 888 | int rw = bio_data_dir(bio); |
887 | struct mapped_device *md = q->queuedata; | 889 | struct mapped_device *md = q->queuedata; |
890 | int cpu; | ||
888 | 891 | ||
889 | /* | 892 | /* |
890 | * There is no use in forwarding any barrier request since we can't | 893 | * There is no use in forwarding any barrier request since we can't |
@@ -897,8 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
897 | 900 | ||
898 | down_read(&md->io_lock); | 901 | down_read(&md->io_lock); |
899 | 902 | ||
900 | disk_stat_inc(dm_disk(md), ios[rw]); | 903 | cpu = part_stat_lock(); |
901 | disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); | 904 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); |
905 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | ||
906 | part_stat_unlock(); | ||
902 | 907 | ||
903 | /* | 908 | /* |
904 | * If we're suspended we have to queue | 909 | * If we're suspended we have to queue |
@@ -1146,7 +1151,7 @@ static void unlock_fs(struct mapped_device *md); | |||
1146 | 1151 | ||
1147 | static void free_dev(struct mapped_device *md) | 1152 | static void free_dev(struct mapped_device *md) |
1148 | { | 1153 | { |
1149 | int minor = md->disk->first_minor; | 1154 | int minor = MINOR(disk_devt(md->disk)); |
1150 | 1155 | ||
1151 | if (md->suspended_bdev) { | 1156 | if (md->suspended_bdev) { |
1152 | unlock_fs(md); | 1157 | unlock_fs(md); |
@@ -1182,7 +1187,7 @@ static void event_callback(void *context) | |||
1182 | list_splice_init(&md->uevent_list, &uevents); | 1187 | list_splice_init(&md->uevent_list, &uevents); |
1183 | spin_unlock_irqrestore(&md->uevent_lock, flags); | 1188 | spin_unlock_irqrestore(&md->uevent_lock, flags); |
1184 | 1189 | ||
1185 | dm_send_uevents(&uevents, &md->disk->dev.kobj); | 1190 | dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); |
1186 | 1191 | ||
1187 | atomic_inc(&md->event_nr); | 1192 | atomic_inc(&md->event_nr); |
1188 | wake_up(&md->eventq); | 1193 | wake_up(&md->eventq); |
@@ -1267,7 +1272,7 @@ static struct mapped_device *dm_find_md(dev_t dev) | |||
1267 | 1272 | ||
1268 | md = idr_find(&_minor_idr, minor); | 1273 | md = idr_find(&_minor_idr, minor); |
1269 | if (md && (md == MINOR_ALLOCED || | 1274 | if (md && (md == MINOR_ALLOCED || |
1270 | (dm_disk(md)->first_minor != minor) || | 1275 | (MINOR(disk_devt(dm_disk(md))) != minor) || |
1271 | test_bit(DMF_FREEING, &md->flags))) { | 1276 | test_bit(DMF_FREEING, &md->flags))) { |
1272 | md = NULL; | 1277 | md = NULL; |
1273 | goto out; | 1278 | goto out; |
@@ -1318,7 +1323,8 @@ void dm_put(struct mapped_device *md) | |||
1318 | 1323 | ||
1319 | if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { | 1324 | if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { |
1320 | map = dm_get_table(md); | 1325 | map = dm_get_table(md); |
1321 | idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); | 1326 | idr_replace(&_minor_idr, MINOR_ALLOCED, |
1327 | MINOR(disk_devt(dm_disk(md)))); | ||
1322 | set_bit(DMF_FREEING, &md->flags); | 1328 | set_bit(DMF_FREEING, &md->flags); |
1323 | spin_unlock(&_minor_lock); | 1329 | spin_unlock(&_minor_lock); |
1324 | if (!dm_suspended(md)) { | 1330 | if (!dm_suspended(md)) { |
@@ -1638,7 +1644,7 @@ out: | |||
1638 | *---------------------------------------------------------------*/ | 1644 | *---------------------------------------------------------------*/ |
1639 | void dm_kobject_uevent(struct mapped_device *md) | 1645 | void dm_kobject_uevent(struct mapped_device *md) |
1640 | { | 1646 | { |
1641 | kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); | 1647 | kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); |
1642 | } | 1648 | } |
1643 | 1649 | ||
1644 | uint32_t dm_next_uevent_seq(struct mapped_device *md) | 1650 | uint32_t dm_next_uevent_seq(struct mapped_device *md) |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1e59a0b0a78..cd189da2b2f 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -25,13 +25,10 @@ | |||
25 | /* | 25 | /* |
26 | * List of devices that a metadevice uses and should open/close. | 26 | * List of devices that a metadevice uses and should open/close. |
27 | */ | 27 | */ |
28 | struct dm_dev { | 28 | struct dm_dev_internal { |
29 | struct list_head list; | 29 | struct list_head list; |
30 | |||
31 | atomic_t count; | 30 | atomic_t count; |
32 | int mode; | 31 | struct dm_dev dm_dev; |
33 | struct block_device *bdev; | ||
34 | char name[16]; | ||
35 | }; | 32 | }; |
36 | 33 | ||
37 | struct dm_table; | 34 | struct dm_table; |
@@ -49,7 +46,6 @@ void dm_table_presuspend_targets(struct dm_table *t); | |||
49 | void dm_table_postsuspend_targets(struct dm_table *t); | 46 | void dm_table_postsuspend_targets(struct dm_table *t); |
50 | int dm_table_resume_targets(struct dm_table *t); | 47 | int dm_table_resume_targets(struct dm_table *t); |
51 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); | 48 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); |
52 | void dm_table_unplug_all(struct dm_table *t); | ||
53 | 49 | ||
54 | /* | 50 | /* |
55 | * To check the return value from dm_table_find_target(). | 51 | * To check the return value from dm_table_find_target(). |
@@ -93,8 +89,6 @@ void dm_linear_exit(void); | |||
93 | int dm_stripe_init(void); | 89 | int dm_stripe_init(void); |
94 | void dm_stripe_exit(void); | 90 | void dm_stripe_exit(void); |
95 | 91 | ||
96 | void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); | ||
97 | union map_info *dm_get_mapinfo(struct bio *bio); | ||
98 | int dm_open_count(struct mapped_device *md); | 92 | int dm_open_count(struct mapped_device *md); |
99 | int dm_lock_for_deletion(struct mapped_device *md); | 93 | int dm_lock_for_deletion(struct mapped_device *md); |
100 | 94 | ||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 268547dbfbd..f26c1f9a475 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -287,6 +287,8 @@ static int run(mddev_t *mddev) | |||
287 | int i; | 287 | int i; |
288 | 288 | ||
289 | conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); | 289 | conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); |
290 | if (!conf) | ||
291 | return -ENOMEM; | ||
290 | 292 | ||
291 | for (i=0; i<Modes; i++) { | 293 | for (i=0; i<Modes; i++) { |
292 | atomic_set(&conf->counters[i], 0); | 294 | atomic_set(&conf->counters[i], 0); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c20..190147c79e7 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -16,16 +16,8 @@ | |||
16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | #include <linux/raid/md.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/raid/linear.h> | 19 | #include <linux/raid/linear.h> |
24 | 20 | ||
25 | #define MAJOR_NR MD_MAJOR | ||
26 | #define MD_DRIVER | ||
27 | #define MD_PERSONALITY | ||
28 | |||
29 | /* | 21 | /* |
30 | * find which device holds a particular offset | 22 | * find which device holds a particular offset |
31 | */ | 23 | */ |
@@ -33,16 +25,15 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) | |||
33 | { | 25 | { |
34 | dev_info_t *hash; | 26 | dev_info_t *hash; |
35 | linear_conf_t *conf = mddev_to_conf(mddev); | 27 | linear_conf_t *conf = mddev_to_conf(mddev); |
36 | sector_t block = sector >> 1; | ||
37 | 28 | ||
38 | /* | 29 | /* |
39 | * sector_div(a,b) returns the remainer and sets a to a/b | 30 | * sector_div(a,b) returns the remainer and sets a to a/b |
40 | */ | 31 | */ |
41 | block >>= conf->preshift; | 32 | sector >>= conf->sector_shift; |
42 | (void)sector_div(block, conf->hash_spacing); | 33 | (void)sector_div(sector, conf->spacing); |
43 | hash = conf->hash_table[block]; | 34 | hash = conf->hash_table[sector]; |
44 | 35 | ||
45 | while ((sector>>1) >= (hash->size + hash->offset)) | 36 | while (sector >= hash->num_sectors + hash->start_sector) |
46 | hash++; | 37 | hash++; |
47 | return hash; | 38 | return hash; |
48 | } | 39 | } |
@@ -65,7 +56,7 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
65 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 56 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
66 | 57 | ||
67 | dev0 = which_dev(mddev, sector); | 58 | dev0 = which_dev(mddev, sector); |
68 | maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); | 59 | maxsectors = dev0->num_sectors - (sector - dev0->start_sector); |
69 | 60 | ||
70 | if (maxsectors < bio_sectors) | 61 | if (maxsectors < bio_sectors) |
71 | maxsectors = 0; | 62 | maxsectors = 0; |
@@ -112,8 +103,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
112 | dev_info_t **table; | 103 | dev_info_t **table; |
113 | mdk_rdev_t *rdev; | 104 | mdk_rdev_t *rdev; |
114 | int i, nb_zone, cnt; | 105 | int i, nb_zone, cnt; |
115 | sector_t min_spacing; | 106 | sector_t min_sectors; |
116 | sector_t curr_offset; | 107 | sector_t curr_sector; |
117 | struct list_head *tmp; | 108 | struct list_head *tmp; |
118 | 109 | ||
119 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), | 110 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), |
@@ -145,7 +136,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
145 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) | 136 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) |
146 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 137 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
147 | 138 | ||
148 | disk->size = rdev->size; | 139 | disk->num_sectors = rdev->size * 2; |
149 | conf->array_sectors += rdev->size * 2; | 140 | conf->array_sectors += rdev->size * 2; |
150 | 141 | ||
151 | cnt++; | 142 | cnt++; |
@@ -155,34 +146,34 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
155 | goto out; | 146 | goto out; |
156 | } | 147 | } |
157 | 148 | ||
158 | min_spacing = conf->array_sectors / 2; | 149 | min_sectors = conf->array_sectors; |
159 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); | 150 | sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); |
160 | 151 | ||
161 | /* min_spacing is the minimum spacing that will fit the hash | 152 | /* min_sectors is the minimum spacing that will fit the hash |
162 | * table in one PAGE. This may be much smaller than needed. | 153 | * table in one PAGE. This may be much smaller than needed. |
163 | * We find the smallest non-terminal set of consecutive devices | 154 | * We find the smallest non-terminal set of consecutive devices |
164 | * that is larger than min_spacing as use the size of that as | 155 | * that is larger than min_sectors and use the size of that as |
165 | * the actual spacing | 156 | * the actual spacing |
166 | */ | 157 | */ |
167 | conf->hash_spacing = conf->array_sectors / 2; | 158 | conf->spacing = conf->array_sectors; |
168 | for (i=0; i < cnt-1 ; i++) { | 159 | for (i=0; i < cnt-1 ; i++) { |
169 | sector_t sz = 0; | 160 | sector_t tmp = 0; |
170 | int j; | 161 | int j; |
171 | for (j = i; j < cnt - 1 && sz < min_spacing; j++) | 162 | for (j = i; j < cnt - 1 && tmp < min_sectors; j++) |
172 | sz += conf->disks[j].size; | 163 | tmp += conf->disks[j].num_sectors; |
173 | if (sz >= min_spacing && sz < conf->hash_spacing) | 164 | if (tmp >= min_sectors && tmp < conf->spacing) |
174 | conf->hash_spacing = sz; | 165 | conf->spacing = tmp; |
175 | } | 166 | } |
176 | 167 | ||
177 | /* hash_spacing may be too large for sector_div to work with, | 168 | /* spacing may be too large for sector_div to work with, |
178 | * so we might need to pre-shift | 169 | * so we might need to pre-shift |
179 | */ | 170 | */ |
180 | conf->preshift = 0; | 171 | conf->sector_shift = 0; |
181 | if (sizeof(sector_t) > sizeof(u32)) { | 172 | if (sizeof(sector_t) > sizeof(u32)) { |
182 | sector_t space = conf->hash_spacing; | 173 | sector_t space = conf->spacing; |
183 | while (space > (sector_t)(~(u32)0)) { | 174 | while (space > (sector_t)(~(u32)0)) { |
184 | space >>= 1; | 175 | space >>= 1; |
185 | conf->preshift++; | 176 | conf->sector_shift++; |
186 | } | 177 | } |
187 | } | 178 | } |
188 | /* | 179 | /* |
@@ -194,9 +185,9 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
194 | unsigned round; | 185 | unsigned round; |
195 | unsigned long base; | 186 | unsigned long base; |
196 | 187 | ||
197 | sz = conf->array_sectors >> (conf->preshift + 1); | 188 | sz = conf->array_sectors >> conf->sector_shift; |
198 | sz += 1; /* force round-up */ | 189 | sz += 1; /* force round-up */ |
199 | base = conf->hash_spacing >> conf->preshift; | 190 | base = conf->spacing >> conf->sector_shift; |
200 | round = sector_div(sz, base); | 191 | round = sector_div(sz, base); |
201 | nb_zone = sz + (round ? 1 : 0); | 192 | nb_zone = sz + (round ? 1 : 0); |
202 | } | 193 | } |
@@ -211,32 +202,31 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
211 | * Here we generate the linear hash table | 202 | * Here we generate the linear hash table |
212 | * First calculate the device offsets. | 203 | * First calculate the device offsets. |
213 | */ | 204 | */ |
214 | conf->disks[0].offset = 0; | 205 | conf->disks[0].start_sector = 0; |
215 | for (i = 1; i < raid_disks; i++) | 206 | for (i = 1; i < raid_disks; i++) |
216 | conf->disks[i].offset = | 207 | conf->disks[i].start_sector = |
217 | conf->disks[i-1].offset + | 208 | conf->disks[i-1].start_sector + |
218 | conf->disks[i-1].size; | 209 | conf->disks[i-1].num_sectors; |
219 | 210 | ||
220 | table = conf->hash_table; | 211 | table = conf->hash_table; |
221 | curr_offset = 0; | ||
222 | i = 0; | 212 | i = 0; |
223 | for (curr_offset = 0; | 213 | for (curr_sector = 0; |
224 | curr_offset < conf->array_sectors / 2; | 214 | curr_sector < conf->array_sectors; |
225 | curr_offset += conf->hash_spacing) { | 215 | curr_sector += conf->spacing) { |
226 | 216 | ||
227 | while (i < raid_disks-1 && | 217 | while (i < raid_disks-1 && |
228 | curr_offset >= conf->disks[i+1].offset) | 218 | curr_sector >= conf->disks[i+1].start_sector) |
229 | i++; | 219 | i++; |
230 | 220 | ||
231 | *table ++ = conf->disks + i; | 221 | *table ++ = conf->disks + i; |
232 | } | 222 | } |
233 | 223 | ||
234 | if (conf->preshift) { | 224 | if (conf->sector_shift) { |
235 | conf->hash_spacing >>= conf->preshift; | 225 | conf->spacing >>= conf->sector_shift; |
236 | /* round hash_spacing up so that when we divide by it, | 226 | /* round spacing up so that when we divide by it, |
237 | * we err on the side of "too-low", which is safest. | 227 | * we err on the side of "too-low", which is safest. |
238 | */ | 228 | */ |
239 | conf->hash_spacing++; | 229 | conf->spacing++; |
240 | } | 230 | } |
241 | 231 | ||
242 | BUG_ON(table - conf->hash_table > nb_zone); | 232 | BUG_ON(table - conf->hash_table > nb_zone); |
@@ -317,40 +307,47 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
317 | const int rw = bio_data_dir(bio); | 307 | const int rw = bio_data_dir(bio); |
318 | mddev_t *mddev = q->queuedata; | 308 | mddev_t *mddev = q->queuedata; |
319 | dev_info_t *tmp_dev; | 309 | dev_info_t *tmp_dev; |
320 | sector_t block; | 310 | int cpu; |
321 | 311 | ||
322 | if (unlikely(bio_barrier(bio))) { | 312 | if (unlikely(bio_barrier(bio))) { |
323 | bio_endio(bio, -EOPNOTSUPP); | 313 | bio_endio(bio, -EOPNOTSUPP); |
324 | return 0; | 314 | return 0; |
325 | } | 315 | } |
326 | 316 | ||
327 | disk_stat_inc(mddev->gendisk, ios[rw]); | 317 | cpu = part_stat_lock(); |
328 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 318 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
319 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
320 | bio_sectors(bio)); | ||
321 | part_stat_unlock(); | ||
329 | 322 | ||
330 | tmp_dev = which_dev(mddev, bio->bi_sector); | 323 | tmp_dev = which_dev(mddev, bio->bi_sector); |
331 | block = bio->bi_sector >> 1; | ||
332 | 324 | ||
333 | if (unlikely(block >= (tmp_dev->size + tmp_dev->offset) | 325 | if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + |
334 | || block < tmp_dev->offset)) { | 326 | tmp_dev->start_sector) |
327 | || (bio->bi_sector < | ||
328 | tmp_dev->start_sector))) { | ||
335 | char b[BDEVNAME_SIZE]; | 329 | char b[BDEVNAME_SIZE]; |
336 | 330 | ||
337 | printk("linear_make_request: Block %llu out of bounds on " | 331 | printk("linear_make_request: Sector %llu out of bounds on " |
338 | "dev %s size %llu offset %llu\n", | 332 | "dev %s: %llu sectors, offset %llu\n", |
339 | (unsigned long long)block, | 333 | (unsigned long long)bio->bi_sector, |
340 | bdevname(tmp_dev->rdev->bdev, b), | 334 | bdevname(tmp_dev->rdev->bdev, b), |
341 | (unsigned long long)tmp_dev->size, | 335 | (unsigned long long)tmp_dev->num_sectors, |
342 | (unsigned long long)tmp_dev->offset); | 336 | (unsigned long long)tmp_dev->start_sector); |
343 | bio_io_error(bio); | 337 | bio_io_error(bio); |
344 | return 0; | 338 | return 0; |
345 | } | 339 | } |
346 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > | 340 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > |
347 | (tmp_dev->offset + tmp_dev->size)<<1)) { | 341 | tmp_dev->start_sector + tmp_dev->num_sectors)) { |
348 | /* This bio crosses a device boundary, so we have to | 342 | /* This bio crosses a device boundary, so we have to |
349 | * split it. | 343 | * split it. |
350 | */ | 344 | */ |
351 | struct bio_pair *bp; | 345 | struct bio_pair *bp; |
352 | bp = bio_split(bio, bio_split_pool, | 346 | |
353 | ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); | 347 | bp = bio_split(bio, |
348 | tmp_dev->start_sector + tmp_dev->num_sectors | ||
349 | - bio->bi_sector); | ||
350 | |||
354 | if (linear_make_request(q, &bp->bio1)) | 351 | if (linear_make_request(q, &bp->bio1)) |
355 | generic_make_request(&bp->bio1); | 352 | generic_make_request(&bp->bio1); |
356 | if (linear_make_request(q, &bp->bio2)) | 353 | if (linear_make_request(q, &bp->bio2)) |
@@ -360,7 +357,8 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
360 | } | 357 | } |
361 | 358 | ||
362 | bio->bi_bdev = tmp_dev->rdev->bdev; | 359 | bio->bi_bdev = tmp_dev->rdev->bdev; |
363 | bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset; | 360 | bio->bi_sector = bio->bi_sector - tmp_dev->start_sector |
361 | + tmp_dev->rdev->data_offset; | ||
364 | 362 | ||
365 | return 1; | 363 | return 1; |
366 | } | 364 | } |
@@ -368,29 +366,6 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
368 | static void linear_status (struct seq_file *seq, mddev_t *mddev) | 366 | static void linear_status (struct seq_file *seq, mddev_t *mddev) |
369 | { | 367 | { |
370 | 368 | ||
371 | #undef MD_DEBUG | ||
372 | #ifdef MD_DEBUG | ||
373 | int j; | ||
374 | linear_conf_t *conf = mddev_to_conf(mddev); | ||
375 | sector_t s = 0; | ||
376 | |||
377 | seq_printf(seq, " "); | ||
378 | for (j = 0; j < mddev->raid_disks; j++) | ||
379 | { | ||
380 | char b[BDEVNAME_SIZE]; | ||
381 | s += conf->smallest_size; | ||
382 | seq_printf(seq, "[%s", | ||
383 | bdevname(conf->hash_table[j][0].rdev->bdev,b)); | ||
384 | |||
385 | while (s > conf->hash_table[j][0].offset + | ||
386 | conf->hash_table[j][0].size) | ||
387 | seq_printf(seq, "/%s] ", | ||
388 | bdevname(conf->hash_table[j][1].rdev->bdev,b)); | ||
389 | else | ||
390 | seq_printf(seq, "] "); | ||
391 | } | ||
392 | seq_printf(seq, "\n"); | ||
393 | #endif | ||
394 | seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); | 369 | seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); |
395 | } | 370 | } |
396 | 371 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b4417..aaa3d465de4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -32,31 +32,21 @@ | |||
32 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 32 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/kthread.h> | 35 | #include <linux/kthread.h> |
38 | #include <linux/linkage.h> | ||
39 | #include <linux/raid/md.h> | 36 | #include <linux/raid/md.h> |
40 | #include <linux/raid/bitmap.h> | 37 | #include <linux/raid/bitmap.h> |
41 | #include <linux/sysctl.h> | 38 | #include <linux/sysctl.h> |
42 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
43 | #include <linux/poll.h> | 40 | #include <linux/poll.h> |
44 | #include <linux/mutex.h> | ||
45 | #include <linux/ctype.h> | 41 | #include <linux/ctype.h> |
46 | #include <linux/freezer.h> | 42 | #include <linux/hdreg.h> |
47 | 43 | #include <linux/proc_fs.h> | |
48 | #include <linux/init.h> | 44 | #include <linux/random.h> |
49 | 45 | #include <linux/reboot.h> | |
50 | #include <linux/file.h> | 46 | #include <linux/file.h> |
51 | 47 | #include <linux/delay.h> | |
52 | #ifdef CONFIG_KMOD | ||
53 | #include <linux/kmod.h> | ||
54 | #endif | ||
55 | |||
56 | #include <asm/unaligned.h> | ||
57 | 48 | ||
58 | #define MAJOR_NR MD_MAJOR | 49 | #define MAJOR_NR MD_MAJOR |
59 | #define MD_DRIVER | ||
60 | 50 | ||
61 | /* 63 partitions with the alternate major number (mdp) */ | 51 | /* 63 partitions with the alternate major number (mdp) */ |
62 | #define MdpMinorShift 6 | 52 | #define MdpMinorShift 6 |
@@ -66,7 +56,7 @@ | |||
66 | 56 | ||
67 | 57 | ||
68 | #ifndef MODULE | 58 | #ifndef MODULE |
69 | static void autostart_arrays (int part); | 59 | static void autostart_arrays(int part); |
70 | #endif | 60 | #endif |
71 | 61 | ||
72 | static LIST_HEAD(pers_list); | 62 | static LIST_HEAD(pers_list); |
@@ -212,7 +202,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
212 | ) | 202 | ) |
213 | 203 | ||
214 | 204 | ||
215 | static int md_fail_request (struct request_queue *q, struct bio *bio) | 205 | static int md_fail_request(struct request_queue *q, struct bio *bio) |
216 | { | 206 | { |
217 | bio_io_error(bio); | 207 | bio_io_error(bio); |
218 | return 0; | 208 | return 0; |
@@ -1464,10 +1454,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1464 | if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) | 1454 | if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) |
1465 | goto fail; | 1455 | goto fail; |
1466 | 1456 | ||
1467 | if (rdev->bdev->bd_part) | 1457 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; |
1468 | ko = &rdev->bdev->bd_part->dev.kobj; | ||
1469 | else | ||
1470 | ko = &rdev->bdev->bd_disk->dev.kobj; | ||
1471 | if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { | 1458 | if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { |
1472 | kobject_del(&rdev->kobj); | 1459 | kobject_del(&rdev->kobj); |
1473 | goto fail; | 1460 | goto fail; |
@@ -2109,8 +2096,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2109 | 2096 | ||
2110 | if (strict_strtoull(buf, 10, &size) < 0) | 2097 | if (strict_strtoull(buf, 10, &size) < 0) |
2111 | return -EINVAL; | 2098 | return -EINVAL; |
2112 | if (size < my_mddev->size) | ||
2113 | return -EINVAL; | ||
2114 | if (my_mddev->pers && rdev->raid_disk >= 0) { | 2099 | if (my_mddev->pers && rdev->raid_disk >= 0) { |
2115 | if (my_mddev->persistent) { | 2100 | if (my_mddev->persistent) { |
2116 | size = super_types[my_mddev->major_version]. | 2101 | size = super_types[my_mddev->major_version]. |
@@ -2121,9 +2106,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2121 | size = (rdev->bdev->bd_inode->i_size >> 10); | 2106 | size = (rdev->bdev->bd_inode->i_size >> 10); |
2122 | size -= rdev->data_offset/2; | 2107 | size -= rdev->data_offset/2; |
2123 | } | 2108 | } |
2124 | if (size < my_mddev->size) | ||
2125 | return -EINVAL; /* component must fit device */ | ||
2126 | } | 2109 | } |
2110 | if (size < my_mddev->size) | ||
2111 | return -EINVAL; /* component must fit device */ | ||
2127 | 2112 | ||
2128 | rdev->size = size; | 2113 | rdev->size = size; |
2129 | if (size > oldsize && my_mddev->external) { | 2114 | if (size > oldsize && my_mddev->external) { |
@@ -2409,12 +2394,11 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | |||
2409 | int i; | 2394 | int i; |
2410 | unsigned long msec; | 2395 | unsigned long msec; |
2411 | char buf[30]; | 2396 | char buf[30]; |
2412 | char *e; | 2397 | |
2413 | /* remove a period, and count digits after it */ | 2398 | /* remove a period, and count digits after it */ |
2414 | if (len >= sizeof(buf)) | 2399 | if (len >= sizeof(buf)) |
2415 | return -EINVAL; | 2400 | return -EINVAL; |
2416 | strlcpy(buf, cbuf, len); | 2401 | strlcpy(buf, cbuf, sizeof(buf)); |
2417 | buf[len] = 0; | ||
2418 | for (i=0; i<len; i++) { | 2402 | for (i=0; i<len; i++) { |
2419 | if (dot) { | 2403 | if (dot) { |
2420 | if (isdigit(buf[i])) { | 2404 | if (isdigit(buf[i])) { |
@@ -2427,8 +2411,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | |||
2427 | buf[i] = 0; | 2411 | buf[i] = 0; |
2428 | } | 2412 | } |
2429 | } | 2413 | } |
2430 | msec = simple_strtoul(buf, &e, 10); | 2414 | if (strict_strtoul(buf, 10, &msec) < 0) |
2431 | if (e == buf || (*e && *e != '\n')) | ||
2432 | return -EINVAL; | 2415 | return -EINVAL; |
2433 | msec = (msec * 1000) / scale; | 2416 | msec = (msec * 1000) / scale; |
2434 | if (msec == 0) | 2417 | if (msec == 0) |
@@ -2730,9 +2713,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2730 | break; | 2713 | break; |
2731 | case read_auto: | 2714 | case read_auto: |
2732 | if (mddev->pers) { | 2715 | if (mddev->pers) { |
2733 | if (mddev->ro != 1) | 2716 | if (mddev->ro == 0) |
2734 | err = do_md_stop(mddev, 1, 0); | 2717 | err = do_md_stop(mddev, 1, 0); |
2735 | else | 2718 | else if (mddev->ro == 1) |
2736 | err = restart_array(mddev); | 2719 | err = restart_array(mddev); |
2737 | if (err == 0) { | 2720 | if (err == 0) { |
2738 | mddev->ro = 2; | 2721 | mddev->ro = 2; |
@@ -2948,7 +2931,13 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2948 | { | 2931 | { |
2949 | int major, minor; | 2932 | int major, minor; |
2950 | char *e; | 2933 | char *e; |
2951 | if (!list_empty(&mddev->disks)) | 2934 | /* Changing the details of 'external' metadata is |
2935 | * always permitted. Otherwise there must be | ||
2936 | * no devices attached to the array. | ||
2937 | */ | ||
2938 | if (mddev->external && strncmp(buf, "external:", 9) == 0) | ||
2939 | ; | ||
2940 | else if (!list_empty(&mddev->disks)) | ||
2952 | return -EBUSY; | 2941 | return -EBUSY; |
2953 | 2942 | ||
2954 | if (cmd_match(buf, "none")) { | 2943 | if (cmd_match(buf, "none")) { |
@@ -3470,8 +3459,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
3470 | disk->queue = mddev->queue; | 3459 | disk->queue = mddev->queue; |
3471 | add_disk(disk); | 3460 | add_disk(disk); |
3472 | mddev->gendisk = disk; | 3461 | mddev->gendisk = disk; |
3473 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, | 3462 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, |
3474 | "%s", "md"); | 3463 | &disk_to_dev(disk)->kobj, "%s", "md"); |
3475 | mutex_unlock(&disks_mutex); | 3464 | mutex_unlock(&disks_mutex); |
3476 | if (error) | 3465 | if (error) |
3477 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | 3466 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", |
@@ -3530,17 +3519,12 @@ static int do_md_run(mddev_t * mddev) | |||
3530 | return -EINVAL; | 3519 | return -EINVAL; |
3531 | } | 3520 | } |
3532 | /* | 3521 | /* |
3533 | * chunk-size has to be a power of 2 and multiples of PAGE_SIZE | 3522 | * chunk-size has to be a power of 2 |
3534 | */ | 3523 | */ |
3535 | if ( (1 << ffz(~chunk_size)) != chunk_size) { | 3524 | if ( (1 << ffz(~chunk_size)) != chunk_size) { |
3536 | printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); | 3525 | printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); |
3537 | return -EINVAL; | 3526 | return -EINVAL; |
3538 | } | 3527 | } |
3539 | if (chunk_size < PAGE_SIZE) { | ||
3540 | printk(KERN_ERR "too small chunk_size: %d < %ld\n", | ||
3541 | chunk_size, PAGE_SIZE); | ||
3542 | return -EINVAL; | ||
3543 | } | ||
3544 | 3528 | ||
3545 | /* devices must have minimum size of one chunk */ | 3529 | /* devices must have minimum size of one chunk */ |
3546 | rdev_for_each(rdev, tmp, mddev) { | 3530 | rdev_for_each(rdev, tmp, mddev) { |
@@ -3558,12 +3542,10 @@ static int do_md_run(mddev_t * mddev) | |||
3558 | } | 3542 | } |
3559 | } | 3543 | } |
3560 | 3544 | ||
3561 | #ifdef CONFIG_KMOD | ||
3562 | if (mddev->level != LEVEL_NONE) | 3545 | if (mddev->level != LEVEL_NONE) |
3563 | request_module("md-level-%d", mddev->level); | 3546 | request_module("md-level-%d", mddev->level); |
3564 | else if (mddev->clevel[0]) | 3547 | else if (mddev->clevel[0]) |
3565 | request_module("md-%s", mddev->clevel); | 3548 | request_module("md-%s", mddev->clevel); |
3566 | #endif | ||
3567 | 3549 | ||
3568 | /* | 3550 | /* |
3569 | * Drop all container device buffers, from now on | 3551 | * Drop all container device buffers, from now on |
@@ -3761,7 +3743,7 @@ static int do_md_run(mddev_t * mddev) | |||
3761 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | 3743 | sysfs_notify(&mddev->kobj, NULL, "array_state"); |
3762 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | 3744 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); |
3763 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 3745 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
3764 | kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); | 3746 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
3765 | return 0; | 3747 | return 0; |
3766 | } | 3748 | } |
3767 | 3749 | ||
@@ -3974,10 +3956,10 @@ static void autorun_array(mddev_t *mddev) | |||
3974 | } | 3956 | } |
3975 | printk("\n"); | 3957 | printk("\n"); |
3976 | 3958 | ||
3977 | err = do_md_run (mddev); | 3959 | err = do_md_run(mddev); |
3978 | if (err) { | 3960 | if (err) { |
3979 | printk(KERN_WARNING "md: do_md_run() returned %d\n", err); | 3961 | printk(KERN_WARNING "md: do_md_run() returned %d\n", err); |
3980 | do_md_stop (mddev, 0, 0); | 3962 | do_md_stop(mddev, 0, 0); |
3981 | } | 3963 | } |
3982 | } | 3964 | } |
3983 | 3965 | ||
@@ -4336,7 +4318,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4336 | 4318 | ||
4337 | if (!(info->state & (1<<MD_DISK_FAULTY))) { | 4319 | if (!(info->state & (1<<MD_DISK_FAULTY))) { |
4338 | int err; | 4320 | int err; |
4339 | rdev = md_import_device (dev, -1, 0); | 4321 | rdev = md_import_device(dev, -1, 0); |
4340 | if (IS_ERR(rdev)) { | 4322 | if (IS_ERR(rdev)) { |
4341 | printk(KERN_WARNING | 4323 | printk(KERN_WARNING |
4342 | "md: error, md_import_device() returned %ld\n", | 4324 | "md: error, md_import_device() returned %ld\n", |
@@ -4418,7 +4400,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
4418 | return -EINVAL; | 4400 | return -EINVAL; |
4419 | } | 4401 | } |
4420 | 4402 | ||
4421 | rdev = md_import_device (dev, -1, 0); | 4403 | rdev = md_import_device(dev, -1, 0); |
4422 | if (IS_ERR(rdev)) { | 4404 | if (IS_ERR(rdev)) { |
4423 | printk(KERN_WARNING | 4405 | printk(KERN_WARNING |
4424 | "md: error, md_import_device() returned %ld\n", | 4406 | "md: error, md_import_device() returned %ld\n", |
@@ -4937,11 +4919,11 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4937 | goto done_unlock; | 4919 | goto done_unlock; |
4938 | 4920 | ||
4939 | case STOP_ARRAY: | 4921 | case STOP_ARRAY: |
4940 | err = do_md_stop (mddev, 0, 1); | 4922 | err = do_md_stop(mddev, 0, 1); |
4941 | goto done_unlock; | 4923 | goto done_unlock; |
4942 | 4924 | ||
4943 | case STOP_ARRAY_RO: | 4925 | case STOP_ARRAY_RO: |
4944 | err = do_md_stop (mddev, 1, 1); | 4926 | err = do_md_stop(mddev, 1, 1); |
4945 | goto done_unlock; | 4927 | goto done_unlock; |
4946 | 4928 | ||
4947 | } | 4929 | } |
@@ -4990,7 +4972,7 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4990 | goto done_unlock; | 4972 | goto done_unlock; |
4991 | 4973 | ||
4992 | case RUN_ARRAY: | 4974 | case RUN_ARRAY: |
4993 | err = do_md_run (mddev); | 4975 | err = do_md_run(mddev); |
4994 | goto done_unlock; | 4976 | goto done_unlock; |
4995 | 4977 | ||
4996 | case SET_BITMAP_FILE: | 4978 | case SET_BITMAP_FILE: |
@@ -5428,11 +5410,11 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5428 | seq_printf(seq, " super non-persistent"); | 5410 | seq_printf(seq, " super non-persistent"); |
5429 | 5411 | ||
5430 | if (mddev->pers) { | 5412 | if (mddev->pers) { |
5431 | mddev->pers->status (seq, mddev); | 5413 | mddev->pers->status(seq, mddev); |
5432 | seq_printf(seq, "\n "); | 5414 | seq_printf(seq, "\n "); |
5433 | if (mddev->pers->sync_request) { | 5415 | if (mddev->pers->sync_request) { |
5434 | if (mddev->curr_resync > 2) { | 5416 | if (mddev->curr_resync > 2) { |
5435 | status_resync (seq, mddev); | 5417 | status_resync(seq, mddev); |
5436 | seq_printf(seq, "\n "); | 5418 | seq_printf(seq, "\n "); |
5437 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 5419 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
5438 | seq_printf(seq, "\tresync=DELAYED\n "); | 5420 | seq_printf(seq, "\tresync=DELAYED\n "); |
@@ -5549,8 +5531,8 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5549 | rcu_read_lock(); | 5531 | rcu_read_lock(); |
5550 | rdev_for_each_rcu(rdev, mddev) { | 5532 | rdev_for_each_rcu(rdev, mddev) { |
5551 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 5533 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5552 | curr_events = disk_stat_read(disk, sectors[0]) + | 5534 | curr_events = part_stat_read(&disk->part0, sectors[0]) + |
5553 | disk_stat_read(disk, sectors[1]) - | 5535 | part_stat_read(&disk->part0, sectors[1]) - |
5554 | atomic_read(&disk->sync_io); | 5536 | atomic_read(&disk->sync_io); |
5555 | /* sync IO will cause sync_io to increase before the disk_stats | 5537 | /* sync IO will cause sync_io to increase before the disk_stats |
5556 | * as sync_io is counted when a request starts, and | 5538 | * as sync_io is counted when a request starts, and |
@@ -6263,7 +6245,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
6263 | * appears to still be in use. Hence | 6245 | * appears to still be in use. Hence |
6264 | * the '100'. | 6246 | * the '100'. |
6265 | */ | 6247 | */ |
6266 | do_md_stop (mddev, 1, 100); | 6248 | do_md_stop(mddev, 1, 100); |
6267 | mddev_unlock(mddev); | 6249 | mddev_unlock(mddev); |
6268 | } | 6250 | } |
6269 | /* | 6251 | /* |
@@ -6307,7 +6289,7 @@ static int __init md_init(void) | |||
6307 | raid_table_header = register_sysctl_table(raid_root_table); | 6289 | raid_table_header = register_sysctl_table(raid_root_table); |
6308 | 6290 | ||
6309 | md_geninit(); | 6291 | md_geninit(); |
6310 | return (0); | 6292 | return 0; |
6311 | } | 6293 | } |
6312 | 6294 | ||
6313 | 6295 | ||
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779ccba1c..d4ac47d1127 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -19,16 +19,7 @@ | |||
19 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 19 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/raid/multipath.h> | 22 | #include <linux/raid/multipath.h> |
26 | #include <linux/buffer_head.h> | ||
27 | #include <asm/atomic.h> | ||
28 | |||
29 | #define MAJOR_NR MD_MAJOR | ||
30 | #define MD_DRIVER | ||
31 | #define MD_PERSONALITY | ||
32 | 23 | ||
33 | #define MAX_WORK_PER_DISK 128 | 24 | #define MAX_WORK_PER_DISK 128 |
34 | 25 | ||
@@ -147,6 +138,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) | |||
147 | struct multipath_bh * mp_bh; | 138 | struct multipath_bh * mp_bh; |
148 | struct multipath_info *multipath; | 139 | struct multipath_info *multipath; |
149 | const int rw = bio_data_dir(bio); | 140 | const int rw = bio_data_dir(bio); |
141 | int cpu; | ||
150 | 142 | ||
151 | if (unlikely(bio_barrier(bio))) { | 143 | if (unlikely(bio_barrier(bio))) { |
152 | bio_endio(bio, -EOPNOTSUPP); | 144 | bio_endio(bio, -EOPNOTSUPP); |
@@ -158,8 +150,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) | |||
158 | mp_bh->master_bio = bio; | 150 | mp_bh->master_bio = bio; |
159 | mp_bh->mddev = mddev; | 151 | mp_bh->mddev = mddev; |
160 | 152 | ||
161 | disk_stat_inc(mddev->gendisk, ios[rw]); | 153 | cpu = part_stat_lock(); |
162 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 154 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
155 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
156 | bio_sectors(bio)); | ||
157 | part_stat_unlock(); | ||
163 | 158 | ||
164 | mp_bh->path = multipath_map(conf); | 159 | mp_bh->path = multipath_map(conf); |
165 | if (mp_bh->path < 0) { | 160 | if (mp_bh->path < 0) { |
@@ -172,7 +167,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) | |||
172 | mp_bh->bio = *bio; | 167 | mp_bh->bio = *bio; |
173 | mp_bh->bio.bi_sector += multipath->rdev->data_offset; | 168 | mp_bh->bio.bi_sector += multipath->rdev->data_offset; |
174 | mp_bh->bio.bi_bdev = multipath->rdev->bdev; | 169 | mp_bh->bio.bi_bdev = multipath->rdev->bdev; |
175 | mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST); | 170 | mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); |
176 | mp_bh->bio.bi_end_io = multipath_end_request; | 171 | mp_bh->bio.bi_end_io = multipath_end_request; |
177 | mp_bh->bio.bi_private = mp_bh; | 172 | mp_bh->bio.bi_private = mp_bh; |
178 | generic_make_request(&mp_bh->bio); | 173 | generic_make_request(&mp_bh->bio); |
@@ -398,7 +393,7 @@ static void multipathd (mddev_t *mddev) | |||
398 | *bio = *(mp_bh->master_bio); | 393 | *bio = *(mp_bh->master_bio); |
399 | bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; | 394 | bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; |
400 | bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; | 395 | bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; |
401 | bio->bi_rw |= (1 << BIO_RW_FAILFAST); | 396 | bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); |
402 | bio->bi_end_io = multipath_end_request; | 397 | bio->bi_end_io = multipath_end_request; |
403 | bio->bi_private = mp_bh; | 398 | bio->bi_private = mp_bh; |
404 | generic_make_request(bio); | 399 | generic_make_request(bio); |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 18361063566..8ac6488ad0d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -18,13 +18,8 @@ | |||
18 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/raid/raid0.h> | 21 | #include <linux/raid/raid0.h> |
23 | 22 | ||
24 | #define MAJOR_NR MD_MAJOR | ||
25 | #define MD_DRIVER | ||
26 | #define MD_PERSONALITY | ||
27 | |||
28 | static void raid0_unplug(struct request_queue *q) | 23 | static void raid0_unplug(struct request_queue *q) |
29 | { | 24 | { |
30 | mddev_t *mddev = q->queuedata; | 25 | mddev_t *mddev = q->queuedata; |
@@ -399,14 +394,18 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
399 | sector_t chunk; | 394 | sector_t chunk; |
400 | sector_t block, rsect; | 395 | sector_t block, rsect; |
401 | const int rw = bio_data_dir(bio); | 396 | const int rw = bio_data_dir(bio); |
397 | int cpu; | ||
402 | 398 | ||
403 | if (unlikely(bio_barrier(bio))) { | 399 | if (unlikely(bio_barrier(bio))) { |
404 | bio_endio(bio, -EOPNOTSUPP); | 400 | bio_endio(bio, -EOPNOTSUPP); |
405 | return 0; | 401 | return 0; |
406 | } | 402 | } |
407 | 403 | ||
408 | disk_stat_inc(mddev->gendisk, ios[rw]); | 404 | cpu = part_stat_lock(); |
409 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 405 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
406 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
407 | bio_sectors(bio)); | ||
408 | part_stat_unlock(); | ||
410 | 409 | ||
411 | chunk_size = mddev->chunk_size >> 10; | 410 | chunk_size = mddev->chunk_size >> 10; |
412 | chunk_sects = mddev->chunk_size >> 9; | 411 | chunk_sects = mddev->chunk_size >> 9; |
@@ -423,7 +422,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
423 | /* This is a one page bio that upper layers | 422 | /* This is a one page bio that upper layers |
424 | * refuse to split for us, so we need to split it. | 423 | * refuse to split for us, so we need to split it. |
425 | */ | 424 | */ |
426 | bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 425 | bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); |
427 | if (raid0_make_request(q, &bp->bio1)) | 426 | if (raid0_make_request(q, &bp->bio1)) |
428 | generic_make_request(&bp->bio1); | 427 | generic_make_request(&bp->bio1); |
429 | if (raid0_make_request(q, &bp->bio2)) | 428 | if (raid0_make_request(q, &bp->bio2)) |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab705c2..9c788e2489b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -32,6 +32,7 @@ | |||
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include "dm-bio-list.h" | 34 | #include "dm-bio-list.h" |
35 | #include <linux/delay.h> | ||
35 | #include <linux/raid/raid1.h> | 36 | #include <linux/raid/raid1.h> |
36 | #include <linux/raid/bitmap.h> | 37 | #include <linux/raid/bitmap.h> |
37 | 38 | ||
@@ -779,7 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
779 | struct page **behind_pages = NULL; | 780 | struct page **behind_pages = NULL; |
780 | const int rw = bio_data_dir(bio); | 781 | const int rw = bio_data_dir(bio); |
781 | const int do_sync = bio_sync(bio); | 782 | const int do_sync = bio_sync(bio); |
782 | int do_barriers; | 783 | int cpu, do_barriers; |
783 | mdk_rdev_t *blocked_rdev; | 784 | mdk_rdev_t *blocked_rdev; |
784 | 785 | ||
785 | /* | 786 | /* |
@@ -804,8 +805,11 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
804 | 805 | ||
805 | bitmap = mddev->bitmap; | 806 | bitmap = mddev->bitmap; |
806 | 807 | ||
807 | disk_stat_inc(mddev->gendisk, ios[rw]); | 808 | cpu = part_stat_lock(); |
808 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 809 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
810 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
811 | bio_sectors(bio)); | ||
812 | part_stat_unlock(); | ||
809 | 813 | ||
810 | /* | 814 | /* |
811 | * make_request() can abort the operation when READA is being | 815 | * make_request() can abort the operation when READA is being |
@@ -1302,9 +1306,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1302 | sbio->bi_size = r1_bio->sectors << 9; | 1306 | sbio->bi_size = r1_bio->sectors << 9; |
1303 | sbio->bi_idx = 0; | 1307 | sbio->bi_idx = 0; |
1304 | sbio->bi_phys_segments = 0; | 1308 | sbio->bi_phys_segments = 0; |
1305 | sbio->bi_hw_segments = 0; | ||
1306 | sbio->bi_hw_front_size = 0; | ||
1307 | sbio->bi_hw_back_size = 0; | ||
1308 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | 1309 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); |
1309 | sbio->bi_flags |= 1 << BIO_UPTODATE; | 1310 | sbio->bi_flags |= 1 << BIO_UPTODATE; |
1310 | sbio->bi_next = NULL; | 1311 | sbio->bi_next = NULL; |
@@ -1790,7 +1791,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1790 | bio->bi_vcnt = 0; | 1791 | bio->bi_vcnt = 0; |
1791 | bio->bi_idx = 0; | 1792 | bio->bi_idx = 0; |
1792 | bio->bi_phys_segments = 0; | 1793 | bio->bi_phys_segments = 0; |
1793 | bio->bi_hw_segments = 0; | ||
1794 | bio->bi_size = 0; | 1794 | bio->bi_size = 0; |
1795 | bio->bi_end_io = NULL; | 1795 | bio->bi_end_io = NULL; |
1796 | bio->bi_private = NULL; | 1796 | bio->bi_private = NULL; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e6247..da5129a24b1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -19,6 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "dm-bio-list.h" | 21 | #include "dm-bio-list.h" |
22 | #include <linux/delay.h> | ||
22 | #include <linux/raid/raid10.h> | 23 | #include <linux/raid/raid10.h> |
23 | #include <linux/raid/bitmap.h> | 24 | #include <linux/raid/bitmap.h> |
24 | 25 | ||
@@ -789,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
789 | mirror_info_t *mirror; | 790 | mirror_info_t *mirror; |
790 | r10bio_t *r10_bio; | 791 | r10bio_t *r10_bio; |
791 | struct bio *read_bio; | 792 | struct bio *read_bio; |
793 | int cpu; | ||
792 | int i; | 794 | int i; |
793 | int chunk_sects = conf->chunk_mask + 1; | 795 | int chunk_sects = conf->chunk_mask + 1; |
794 | const int rw = bio_data_dir(bio); | 796 | const int rw = bio_data_dir(bio); |
@@ -816,7 +818,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
816 | /* This is a one page bio that upper layers | 818 | /* This is a one page bio that upper layers |
817 | * refuse to split for us, so we need to split it. | 819 | * refuse to split for us, so we need to split it. |
818 | */ | 820 | */ |
819 | bp = bio_split(bio, bio_split_pool, | 821 | bp = bio_split(bio, |
820 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 822 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); |
821 | if (make_request(q, &bp->bio1)) | 823 | if (make_request(q, &bp->bio1)) |
822 | generic_make_request(&bp->bio1); | 824 | generic_make_request(&bp->bio1); |
@@ -843,8 +845,11 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
843 | */ | 845 | */ |
844 | wait_barrier(conf); | 846 | wait_barrier(conf); |
845 | 847 | ||
846 | disk_stat_inc(mddev->gendisk, ios[rw]); | 848 | cpu = part_stat_lock(); |
847 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 849 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
850 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
851 | bio_sectors(bio)); | ||
852 | part_stat_unlock(); | ||
848 | 853 | ||
849 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | 854 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); |
850 | 855 | ||
@@ -1345,9 +1350,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) | |||
1345 | tbio->bi_size = r10_bio->sectors << 9; | 1350 | tbio->bi_size = r10_bio->sectors << 9; |
1346 | tbio->bi_idx = 0; | 1351 | tbio->bi_idx = 0; |
1347 | tbio->bi_phys_segments = 0; | 1352 | tbio->bi_phys_segments = 0; |
1348 | tbio->bi_hw_segments = 0; | ||
1349 | tbio->bi_hw_front_size = 0; | ||
1350 | tbio->bi_hw_back_size = 0; | ||
1351 | tbio->bi_flags &= ~(BIO_POOL_MASK - 1); | 1353 | tbio->bi_flags &= ~(BIO_POOL_MASK - 1); |
1352 | tbio->bi_flags |= 1 << BIO_UPTODATE; | 1354 | tbio->bi_flags |= 1 << BIO_UPTODATE; |
1353 | tbio->bi_next = NULL; | 1355 | tbio->bi_next = NULL; |
@@ -1947,7 +1949,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1947 | bio->bi_vcnt = 0; | 1949 | bio->bi_vcnt = 0; |
1948 | bio->bi_idx = 0; | 1950 | bio->bi_idx = 0; |
1949 | bio->bi_phys_segments = 0; | 1951 | bio->bi_phys_segments = 0; |
1950 | bio->bi_hw_segments = 0; | ||
1951 | bio->bi_size = 0; | 1952 | bio->bi_size = 0; |
1952 | } | 1953 | } |
1953 | 1954 | ||
@@ -2028,8 +2029,9 @@ static int run(mddev_t *mddev) | |||
2028 | int nc, fc, fo; | 2029 | int nc, fc, fo; |
2029 | sector_t stride, size; | 2030 | sector_t stride, size; |
2030 | 2031 | ||
2031 | if (mddev->chunk_size == 0) { | 2032 | if (mddev->chunk_size < PAGE_SIZE) { |
2032 | printk(KERN_ERR "md/raid10: non-zero chunk size required.\n"); | 2033 | printk(KERN_ERR "md/raid10: chunk size must be " |
2034 | "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); | ||
2033 | return -EINVAL; | 2035 | return -EINVAL; |
2034 | } | 2036 | } |
2035 | 2037 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 224de022e7c..a36a7435edf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -43,12 +43,7 @@ | |||
43 | * miss any bits. | 43 | * miss any bits. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #include <linux/module.h> | ||
47 | #include <linux/slab.h> | ||
48 | #include <linux/highmem.h> | ||
49 | #include <linux/bitops.h> | ||
50 | #include <linux/kthread.h> | 46 | #include <linux/kthread.h> |
51 | #include <asm/atomic.h> | ||
52 | #include "raid6.h" | 47 | #include "raid6.h" |
53 | 48 | ||
54 | #include <linux/raid/bitmap.h> | 49 | #include <linux/raid/bitmap.h> |
@@ -101,6 +96,40 @@ | |||
101 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 96 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); |
102 | #endif | 97 | #endif |
103 | 98 | ||
99 | /* | ||
100 | * We maintain a biased count of active stripes in the bottom 16 bits of | ||
101 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits | ||
102 | */ | ||
103 | static inline int raid5_bi_phys_segments(struct bio *bio) | ||
104 | { | ||
105 | return bio->bi_phys_segments & 0xffff; | ||
106 | } | ||
107 | |||
108 | static inline int raid5_bi_hw_segments(struct bio *bio) | ||
109 | { | ||
110 | return (bio->bi_phys_segments >> 16) & 0xffff; | ||
111 | } | ||
112 | |||
113 | static inline int raid5_dec_bi_phys_segments(struct bio *bio) | ||
114 | { | ||
115 | --bio->bi_phys_segments; | ||
116 | return raid5_bi_phys_segments(bio); | ||
117 | } | ||
118 | |||
119 | static inline int raid5_dec_bi_hw_segments(struct bio *bio) | ||
120 | { | ||
121 | unsigned short val = raid5_bi_hw_segments(bio); | ||
122 | |||
123 | --val; | ||
124 | bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | ||
125 | return val; | ||
126 | } | ||
127 | |||
128 | static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | ||
129 | { | ||
130 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); | ||
131 | } | ||
132 | |||
104 | static inline int raid6_next_disk(int disk, int raid_disks) | 133 | static inline int raid6_next_disk(int disk, int raid_disks) |
105 | { | 134 | { |
106 | disk++; | 135 | disk++; |
@@ -241,7 +270,7 @@ static int grow_buffers(struct stripe_head *sh, int num) | |||
241 | return 0; | 270 | return 0; |
242 | } | 271 | } |
243 | 272 | ||
244 | static void raid5_build_block (struct stripe_head *sh, int i); | 273 | static void raid5_build_block(struct stripe_head *sh, int i); |
245 | 274 | ||
246 | static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks) | 275 | static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks) |
247 | { | 276 | { |
@@ -507,7 +536,7 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
507 | while (rbi && rbi->bi_sector < | 536 | while (rbi && rbi->bi_sector < |
508 | dev->sector + STRIPE_SECTORS) { | 537 | dev->sector + STRIPE_SECTORS) { |
509 | rbi2 = r5_next_bio(rbi, dev->sector); | 538 | rbi2 = r5_next_bio(rbi, dev->sector); |
510 | if (--rbi->bi_phys_segments == 0) { | 539 | if (!raid5_dec_bi_phys_segments(rbi)) { |
511 | rbi->bi_next = return_bi; | 540 | rbi->bi_next = return_bi; |
512 | return_bi = rbi; | 541 | return_bi = rbi; |
513 | } | 542 | } |
@@ -1117,7 +1146,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1117 | release_stripe(sh); | 1146 | release_stripe(sh); |
1118 | } | 1147 | } |
1119 | 1148 | ||
1120 | static void raid5_end_write_request (struct bio *bi, int error) | 1149 | static void raid5_end_write_request(struct bio *bi, int error) |
1121 | { | 1150 | { |
1122 | struct stripe_head *sh = bi->bi_private; | 1151 | struct stripe_head *sh = bi->bi_private; |
1123 | raid5_conf_t *conf = sh->raid_conf; | 1152 | raid5_conf_t *conf = sh->raid_conf; |
@@ -1149,7 +1178,7 @@ static void raid5_end_write_request (struct bio *bi, int error) | |||
1149 | 1178 | ||
1150 | static sector_t compute_blocknr(struct stripe_head *sh, int i); | 1179 | static sector_t compute_blocknr(struct stripe_head *sh, int i); |
1151 | 1180 | ||
1152 | static void raid5_build_block (struct stripe_head *sh, int i) | 1181 | static void raid5_build_block(struct stripe_head *sh, int i) |
1153 | { | 1182 | { |
1154 | struct r5dev *dev = &sh->dev[i]; | 1183 | struct r5dev *dev = &sh->dev[i]; |
1155 | 1184 | ||
@@ -1187,10 +1216,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1187 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 1216 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1188 | } | 1217 | } |
1189 | set_bit(Faulty, &rdev->flags); | 1218 | set_bit(Faulty, &rdev->flags); |
1190 | printk (KERN_ALERT | 1219 | printk(KERN_ALERT |
1191 | "raid5: Disk failure on %s, disabling device.\n" | 1220 | "raid5: Disk failure on %s, disabling device.\n" |
1192 | "raid5: Operation continuing on %d devices.\n", | 1221 | "raid5: Operation continuing on %d devices.\n", |
1193 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); | 1222 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); |
1194 | } | 1223 | } |
1195 | } | 1224 | } |
1196 | 1225 | ||
@@ -1286,8 +1315,8 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, | |||
1286 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; | 1315 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; |
1287 | break; | 1316 | break; |
1288 | default: | 1317 | default: |
1289 | printk (KERN_CRIT "raid6: unsupported algorithm %d\n", | 1318 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", |
1290 | conf->algorithm); | 1319 | conf->algorithm); |
1291 | } | 1320 | } |
1292 | break; | 1321 | break; |
1293 | } | 1322 | } |
@@ -1362,8 +1391,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1362 | } | 1391 | } |
1363 | break; | 1392 | break; |
1364 | default: | 1393 | default: |
1365 | printk (KERN_CRIT "raid6: unsupported algorithm %d\n", | 1394 | printk(KERN_CRIT "raid6: unsupported algorithm %d\n", |
1366 | conf->algorithm); | 1395 | conf->algorithm); |
1367 | } | 1396 | } |
1368 | break; | 1397 | break; |
1369 | } | 1398 | } |
@@ -1371,7 +1400,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1371 | chunk_number = stripe * data_disks + i; | 1400 | chunk_number = stripe * data_disks + i; |
1372 | r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; | 1401 | r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; |
1373 | 1402 | ||
1374 | check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); | 1403 | check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); |
1375 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { | 1404 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { |
1376 | printk(KERN_ERR "compute_blocknr: map not correct\n"); | 1405 | printk(KERN_ERR "compute_blocknr: map not correct\n"); |
1377 | return 0; | 1406 | return 0; |
@@ -1725,7 +1754,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1725 | if (*bip) | 1754 | if (*bip) |
1726 | bi->bi_next = *bip; | 1755 | bi->bi_next = *bip; |
1727 | *bip = bi; | 1756 | *bip = bi; |
1728 | bi->bi_phys_segments ++; | 1757 | bi->bi_phys_segments++; |
1729 | spin_unlock_irq(&conf->device_lock); | 1758 | spin_unlock_irq(&conf->device_lock); |
1730 | spin_unlock(&sh->lock); | 1759 | spin_unlock(&sh->lock); |
1731 | 1760 | ||
@@ -1819,7 +1848,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, | |||
1819 | sh->dev[i].sector + STRIPE_SECTORS) { | 1848 | sh->dev[i].sector + STRIPE_SECTORS) { |
1820 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | 1849 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); |
1821 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 1850 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
1822 | if (--bi->bi_phys_segments == 0) { | 1851 | if (!raid5_dec_bi_phys_segments(bi)) { |
1823 | md_write_end(conf->mddev); | 1852 | md_write_end(conf->mddev); |
1824 | bi->bi_next = *return_bi; | 1853 | bi->bi_next = *return_bi; |
1825 | *return_bi = bi; | 1854 | *return_bi = bi; |
@@ -1834,7 +1863,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, | |||
1834 | sh->dev[i].sector + STRIPE_SECTORS) { | 1863 | sh->dev[i].sector + STRIPE_SECTORS) { |
1835 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | 1864 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); |
1836 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 1865 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
1837 | if (--bi->bi_phys_segments == 0) { | 1866 | if (!raid5_dec_bi_phys_segments(bi)) { |
1838 | md_write_end(conf->mddev); | 1867 | md_write_end(conf->mddev); |
1839 | bi->bi_next = *return_bi; | 1868 | bi->bi_next = *return_bi; |
1840 | *return_bi = bi; | 1869 | *return_bi = bi; |
@@ -1858,7 +1887,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, | |||
1858 | struct bio *nextbi = | 1887 | struct bio *nextbi = |
1859 | r5_next_bio(bi, sh->dev[i].sector); | 1888 | r5_next_bio(bi, sh->dev[i].sector); |
1860 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 1889 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
1861 | if (--bi->bi_phys_segments == 0) { | 1890 | if (!raid5_dec_bi_phys_segments(bi)) { |
1862 | bi->bi_next = *return_bi; | 1891 | bi->bi_next = *return_bi; |
1863 | *return_bi = bi; | 1892 | *return_bi = bi; |
1864 | } | 1893 | } |
@@ -2033,7 +2062,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, | |||
2033 | while (wbi && wbi->bi_sector < | 2062 | while (wbi && wbi->bi_sector < |
2034 | dev->sector + STRIPE_SECTORS) { | 2063 | dev->sector + STRIPE_SECTORS) { |
2035 | wbi2 = r5_next_bio(wbi, dev->sector); | 2064 | wbi2 = r5_next_bio(wbi, dev->sector); |
2036 | if (--wbi->bi_phys_segments == 0) { | 2065 | if (!raid5_dec_bi_phys_segments(wbi)) { |
2037 | md_write_end(conf->mddev); | 2066 | md_write_end(conf->mddev); |
2038 | wbi->bi_next = *return_bi; | 2067 | wbi->bi_next = *return_bi; |
2039 | *return_bi = wbi; | 2068 | *return_bi = wbi; |
@@ -2814,7 +2843,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2814 | copy_data(0, rbi, dev->page, dev->sector); | 2843 | copy_data(0, rbi, dev->page, dev->sector); |
2815 | rbi2 = r5_next_bio(rbi, dev->sector); | 2844 | rbi2 = r5_next_bio(rbi, dev->sector); |
2816 | spin_lock_irq(&conf->device_lock); | 2845 | spin_lock_irq(&conf->device_lock); |
2817 | if (--rbi->bi_phys_segments == 0) { | 2846 | if (!raid5_dec_bi_phys_segments(rbi)) { |
2818 | rbi->bi_next = return_bi; | 2847 | rbi->bi_next = return_bi; |
2819 | return_bi = rbi; | 2848 | return_bi = rbi; |
2820 | } | 2849 | } |
@@ -3155,8 +3184,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) | |||
3155 | if(bi) { | 3184 | if(bi) { |
3156 | conf->retry_read_aligned_list = bi->bi_next; | 3185 | conf->retry_read_aligned_list = bi->bi_next; |
3157 | bi->bi_next = NULL; | 3186 | bi->bi_next = NULL; |
3187 | /* | ||
3188 | * this sets the active strip count to 1 and the processed | ||
3189 | * strip count to zero (upper 8 bits) | ||
3190 | */ | ||
3158 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | 3191 | bi->bi_phys_segments = 1; /* biased count of active stripes */ |
3159 | bi->bi_hw_segments = 0; /* count of processed stripes */ | ||
3160 | } | 3192 | } |
3161 | 3193 | ||
3162 | return bi; | 3194 | return bi; |
@@ -3206,8 +3238,7 @@ static int bio_fits_rdev(struct bio *bi) | |||
3206 | if ((bi->bi_size>>9) > q->max_sectors) | 3238 | if ((bi->bi_size>>9) > q->max_sectors) |
3207 | return 0; | 3239 | return 0; |
3208 | blk_recount_segments(q, bi); | 3240 | blk_recount_segments(q, bi); |
3209 | if (bi->bi_phys_segments > q->max_phys_segments || | 3241 | if (bi->bi_phys_segments > q->max_phys_segments) |
3210 | bi->bi_hw_segments > q->max_hw_segments) | ||
3211 | return 0; | 3242 | return 0; |
3212 | 3243 | ||
3213 | if (q->merge_bvec_fn) | 3244 | if (q->merge_bvec_fn) |
@@ -3351,7 +3382,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3351 | sector_t logical_sector, last_sector; | 3382 | sector_t logical_sector, last_sector; |
3352 | struct stripe_head *sh; | 3383 | struct stripe_head *sh; |
3353 | const int rw = bio_data_dir(bi); | 3384 | const int rw = bio_data_dir(bi); |
3354 | int remaining; | 3385 | int cpu, remaining; |
3355 | 3386 | ||
3356 | if (unlikely(bio_barrier(bi))) { | 3387 | if (unlikely(bio_barrier(bi))) { |
3357 | bio_endio(bi, -EOPNOTSUPP); | 3388 | bio_endio(bi, -EOPNOTSUPP); |
@@ -3360,8 +3391,11 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3360 | 3391 | ||
3361 | md_write_start(mddev, bi); | 3392 | md_write_start(mddev, bi); |
3362 | 3393 | ||
3363 | disk_stat_inc(mddev->gendisk, ios[rw]); | 3394 | cpu = part_stat_lock(); |
3364 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); | 3395 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); |
3396 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
3397 | bio_sectors(bi)); | ||
3398 | part_stat_unlock(); | ||
3365 | 3399 | ||
3366 | if (rw == READ && | 3400 | if (rw == READ && |
3367 | mddev->reshape_position == MaxSector && | 3401 | mddev->reshape_position == MaxSector && |
@@ -3468,7 +3502,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3468 | 3502 | ||
3469 | } | 3503 | } |
3470 | spin_lock_irq(&conf->device_lock); | 3504 | spin_lock_irq(&conf->device_lock); |
3471 | remaining = --bi->bi_phys_segments; | 3505 | remaining = raid5_dec_bi_phys_segments(bi); |
3472 | spin_unlock_irq(&conf->device_lock); | 3506 | spin_unlock_irq(&conf->device_lock); |
3473 | if (remaining == 0) { | 3507 | if (remaining == 0) { |
3474 | 3508 | ||
@@ -3752,7 +3786,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3752 | sector += STRIPE_SECTORS, | 3786 | sector += STRIPE_SECTORS, |
3753 | scnt++) { | 3787 | scnt++) { |
3754 | 3788 | ||
3755 | if (scnt < raid_bio->bi_hw_segments) | 3789 | if (scnt < raid5_bi_hw_segments(raid_bio)) |
3756 | /* already done this stripe */ | 3790 | /* already done this stripe */ |
3757 | continue; | 3791 | continue; |
3758 | 3792 | ||
@@ -3760,7 +3794,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3760 | 3794 | ||
3761 | if (!sh) { | 3795 | if (!sh) { |
3762 | /* failed to get a stripe - must wait */ | 3796 | /* failed to get a stripe - must wait */ |
3763 | raid_bio->bi_hw_segments = scnt; | 3797 | raid5_set_bi_hw_segments(raid_bio, scnt); |
3764 | conf->retry_read_aligned = raid_bio; | 3798 | conf->retry_read_aligned = raid_bio; |
3765 | return handled; | 3799 | return handled; |
3766 | } | 3800 | } |
@@ -3768,7 +3802,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3768 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); | 3802 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); |
3769 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | 3803 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { |
3770 | release_stripe(sh); | 3804 | release_stripe(sh); |
3771 | raid_bio->bi_hw_segments = scnt; | 3805 | raid5_set_bi_hw_segments(raid_bio, scnt); |
3772 | conf->retry_read_aligned = raid_bio; | 3806 | conf->retry_read_aligned = raid_bio; |
3773 | return handled; | 3807 | return handled; |
3774 | } | 3808 | } |
@@ -3778,7 +3812,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3778 | handled++; | 3812 | handled++; |
3779 | } | 3813 | } |
3780 | spin_lock_irq(&conf->device_lock); | 3814 | spin_lock_irq(&conf->device_lock); |
3781 | remaining = --raid_bio->bi_phys_segments; | 3815 | remaining = raid5_dec_bi_phys_segments(raid_bio); |
3782 | spin_unlock_irq(&conf->device_lock); | 3816 | spin_unlock_irq(&conf->device_lock); |
3783 | if (remaining == 0) | 3817 | if (remaining == 0) |
3784 | bio_endio(raid_bio, 0); | 3818 | bio_endio(raid_bio, 0); |
@@ -3973,6 +4007,13 @@ static int run(mddev_t *mddev) | |||
3973 | return -EIO; | 4007 | return -EIO; |
3974 | } | 4008 | } |
3975 | 4009 | ||
4010 | if (mddev->chunk_size < PAGE_SIZE) { | ||
4011 | printk(KERN_ERR "md/raid5: chunk_size must be at least " | ||
4012 | "PAGE_SIZE but %d < %ld\n", | ||
4013 | mddev->chunk_size, PAGE_SIZE); | ||
4014 | return -EINVAL; | ||
4015 | } | ||
4016 | |||
3976 | if (mddev->reshape_position != MaxSector) { | 4017 | if (mddev->reshape_position != MaxSector) { |
3977 | /* Check that we can continue the reshape. | 4018 | /* Check that we can continue the reshape. |
3978 | * Currently only disks can change, it must | 4019 | * Currently only disks can change, it must |
@@ -4250,7 +4291,7 @@ static int stop(mddev_t *mddev) | |||
4250 | } | 4291 | } |
4251 | 4292 | ||
4252 | #ifdef DEBUG | 4293 | #ifdef DEBUG |
4253 | static void print_sh (struct seq_file *seq, struct stripe_head *sh) | 4294 | static void print_sh(struct seq_file *seq, struct stripe_head *sh) |
4254 | { | 4295 | { |
4255 | int i; | 4296 | int i; |
4256 | 4297 | ||
@@ -4266,7 +4307,7 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh) | |||
4266 | seq_printf(seq, "\n"); | 4307 | seq_printf(seq, "\n"); |
4267 | } | 4308 | } |
4268 | 4309 | ||
4269 | static void printall (struct seq_file *seq, raid5_conf_t *conf) | 4310 | static void printall(struct seq_file *seq, raid5_conf_t *conf) |
4270 | { | 4311 | { |
4271 | struct stripe_head *sh; | 4312 | struct stripe_head *sh; |
4272 | struct hlist_node *hn; | 4313 | struct hlist_node *hn; |
@@ -4284,7 +4325,7 @@ static void printall (struct seq_file *seq, raid5_conf_t *conf) | |||
4284 | } | 4325 | } |
4285 | #endif | 4326 | #endif |
4286 | 4327 | ||
4287 | static void status (struct seq_file *seq, mddev_t *mddev) | 4328 | static void status(struct seq_file *seq, mddev_t *mddev) |
4288 | { | 4329 | { |
4289 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 4330 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
4290 | int i; | 4331 | int i; |
diff --git a/drivers/md/raid6.h b/drivers/md/raid6.h index 31cbee71365..98dcde88470 100644 --- a/drivers/md/raid6.h +++ b/drivers/md/raid6.h | |||
@@ -18,15 +18,6 @@ | |||
18 | /* Set to 1 to use kernel-wide empty_zero_page */ | 18 | /* Set to 1 to use kernel-wide empty_zero_page */ |
19 | #define RAID6_USE_EMPTY_ZERO_PAGE 0 | 19 | #define RAID6_USE_EMPTY_ZERO_PAGE 0 |
20 | 20 | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/stddef.h> | ||
23 | #include <linux/compiler.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/errno.h> | ||
27 | #include <linux/mempool.h> | ||
28 | #include <linux/list.h> | ||
29 | #include <linux/vmalloc.h> | ||
30 | #include <linux/raid/md.h> | 21 | #include <linux/raid/md.h> |
31 | #include <linux/raid/raid5.h> | 22 | #include <linux/raid/raid5.h> |
32 | 23 | ||