diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-24 13:26:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-24 13:26:54 -0400 |
commit | c3cb5e193937c7aa50c323e7933507020bd26340 (patch) | |
tree | ea36213ccd29dc4caf2f729fd51b2d489b591a31 /drivers/md/dm-table.c | |
parent | ea94b5034bbebc964115f119d6cd330757fce7f9 (diff) | |
parent | f40c67f0f7e2767f80f7cbcbc1ab86c4113c202e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (48 commits)
dm mpath: change to be request based
dm: disable interrupt when taking map_lock
dm: do not set QUEUE_ORDERED_DRAIN if request based
dm: enable request based option
dm: prepare for request based option
dm raid1: add userspace log
dm: calculate queue limits during resume not load
dm log: fix create_log_context to use logical_block_size of log device
dm target:s introduce iterate devices fn
dm table: establish queue limits by copying table limits
dm table: replace struct io_restrictions with struct queue_limits
dm table: validate device logical_block_size
dm table: ensure targets are aligned to logical_block_size
dm ioctl: support cookies for udev
dm: sysfs add suspended attribute
dm table: improve warning message when devices not freed before destruction
dm mpath: add service time load balancer
dm mpath: add queue length load balancer
dm mpath: add start_io and nr_bytes to path selectors
dm snapshot: use barrier when writing exception store
...
Diffstat (limited to 'drivers/md/dm-table.c')
-rw-r--r-- | drivers/md/dm-table.c | 465 |
1 files changed, 326 insertions, 139 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e9a73bb242b0..4899ebe767c8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -41,6 +41,7 @@ | |||
41 | struct dm_table { | 41 | struct dm_table { |
42 | struct mapped_device *md; | 42 | struct mapped_device *md; |
43 | atomic_t holders; | 43 | atomic_t holders; |
44 | unsigned type; | ||
44 | 45 | ||
45 | /* btree table */ | 46 | /* btree table */ |
46 | unsigned int depth; | 47 | unsigned int depth; |
@@ -62,15 +63,11 @@ struct dm_table { | |||
62 | /* a list of devices used by this table */ | 63 | /* a list of devices used by this table */ |
63 | struct list_head devices; | 64 | struct list_head devices; |
64 | 65 | ||
65 | /* | ||
66 | * These are optimistic limits taken from all the | ||
67 | * targets, some targets will need smaller limits. | ||
68 | */ | ||
69 | struct io_restrictions limits; | ||
70 | |||
71 | /* events get handed up using this callback */ | 66 | /* events get handed up using this callback */ |
72 | void (*event_fn)(void *); | 67 | void (*event_fn)(void *); |
73 | void *event_context; | 68 | void *event_context; |
69 | |||
70 | struct dm_md_mempools *mempools; | ||
74 | }; | 71 | }; |
75 | 72 | ||
76 | /* | 73 | /* |
@@ -89,43 +86,6 @@ static unsigned int int_log(unsigned int n, unsigned int base) | |||
89 | } | 86 | } |
90 | 87 | ||
91 | /* | 88 | /* |
92 | * Returns the minimum that is _not_ zero, unless both are zero. | ||
93 | */ | ||
94 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | ||
95 | |||
96 | /* | ||
97 | * Combine two io_restrictions, always taking the lower value. | ||
98 | */ | ||
99 | static void combine_restrictions_low(struct io_restrictions *lhs, | ||
100 | struct io_restrictions *rhs) | ||
101 | { | ||
102 | lhs->max_sectors = | ||
103 | min_not_zero(lhs->max_sectors, rhs->max_sectors); | ||
104 | |||
105 | lhs->max_phys_segments = | ||
106 | min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments); | ||
107 | |||
108 | lhs->max_hw_segments = | ||
109 | min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments); | ||
110 | |||
111 | lhs->logical_block_size = max(lhs->logical_block_size, | ||
112 | rhs->logical_block_size); | ||
113 | |||
114 | lhs->max_segment_size = | ||
115 | min_not_zero(lhs->max_segment_size, rhs->max_segment_size); | ||
116 | |||
117 | lhs->max_hw_sectors = | ||
118 | min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors); | ||
119 | |||
120 | lhs->seg_boundary_mask = | ||
121 | min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask); | ||
122 | |||
123 | lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn); | ||
124 | |||
125 | lhs->no_cluster |= rhs->no_cluster; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Calculate the index of the child node of the n'th node k'th key. | 89 | * Calculate the index of the child node of the n'th node k'th key. |
130 | */ | 90 | */ |
131 | static inline unsigned int get_child(unsigned int n, unsigned int k) | 91 | static inline unsigned int get_child(unsigned int n, unsigned int k) |
@@ -267,6 +227,8 @@ static void free_devices(struct list_head *devices) | |||
267 | list_for_each_safe(tmp, next, devices) { | 227 | list_for_each_safe(tmp, next, devices) { |
268 | struct dm_dev_internal *dd = | 228 | struct dm_dev_internal *dd = |
269 | list_entry(tmp, struct dm_dev_internal, list); | 229 | list_entry(tmp, struct dm_dev_internal, list); |
230 | DMWARN("dm_table_destroy: dm_put_device call missing for %s", | ||
231 | dd->dm_dev.name); | ||
270 | kfree(dd); | 232 | kfree(dd); |
271 | } | 233 | } |
272 | } | 234 | } |
@@ -296,12 +258,10 @@ void dm_table_destroy(struct dm_table *t) | |||
296 | vfree(t->highs); | 258 | vfree(t->highs); |
297 | 259 | ||
298 | /* free the device list */ | 260 | /* free the device list */ |
299 | if (t->devices.next != &t->devices) { | 261 | if (t->devices.next != &t->devices) |
300 | DMWARN("devices still present during destroy: " | ||
301 | "dm_table_remove_device calls missing"); | ||
302 | |||
303 | free_devices(&t->devices); | 262 | free_devices(&t->devices); |
304 | } | 263 | |
264 | dm_free_md_mempools(t->mempools); | ||
305 | 265 | ||
306 | kfree(t); | 266 | kfree(t); |
307 | } | 267 | } |
@@ -385,15 +345,48 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) | |||
385 | /* | 345 | /* |
386 | * If possible, this checks an area of a destination device is valid. | 346 | * If possible, this checks an area of a destination device is valid. |
387 | */ | 347 | */ |
388 | static int check_device_area(struct dm_dev_internal *dd, sector_t start, | 348 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, |
389 | sector_t len) | 349 | sector_t start, void *data) |
390 | { | 350 | { |
391 | sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; | 351 | struct queue_limits *limits = data; |
352 | struct block_device *bdev = dev->bdev; | ||
353 | sector_t dev_size = | ||
354 | i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; | ||
355 | unsigned short logical_block_size_sectors = | ||
356 | limits->logical_block_size >> SECTOR_SHIFT; | ||
357 | char b[BDEVNAME_SIZE]; | ||
392 | 358 | ||
393 | if (!dev_size) | 359 | if (!dev_size) |
394 | return 1; | 360 | return 1; |
395 | 361 | ||
396 | return ((start < dev_size) && (len <= (dev_size - start))); | 362 | if ((start >= dev_size) || (start + ti->len > dev_size)) { |
363 | DMWARN("%s: %s too small for target", | ||
364 | dm_device_name(ti->table->md), bdevname(bdev, b)); | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | if (logical_block_size_sectors <= 1) | ||
369 | return 1; | ||
370 | |||
371 | if (start & (logical_block_size_sectors - 1)) { | ||
372 | DMWARN("%s: start=%llu not aligned to h/w " | ||
373 | "logical block size %hu of %s", | ||
374 | dm_device_name(ti->table->md), | ||
375 | (unsigned long long)start, | ||
376 | limits->logical_block_size, bdevname(bdev, b)); | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | if (ti->len & (logical_block_size_sectors - 1)) { | ||
381 | DMWARN("%s: len=%llu not aligned to h/w " | ||
382 | "logical block size %hu of %s", | ||
383 | dm_device_name(ti->table->md), | ||
384 | (unsigned long long)ti->len, | ||
385 | limits->logical_block_size, bdevname(bdev, b)); | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | return 1; | ||
397 | } | 390 | } |
398 | 391 | ||
399 | /* | 392 | /* |
@@ -479,38 +472,32 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
479 | } | 472 | } |
480 | atomic_inc(&dd->count); | 473 | atomic_inc(&dd->count); |
481 | 474 | ||
482 | if (!check_device_area(dd, start, len)) { | ||
483 | DMWARN("device %s too small for target", path); | ||
484 | dm_put_device(ti, &dd->dm_dev); | ||
485 | return -EINVAL; | ||
486 | } | ||
487 | |||
488 | *result = &dd->dm_dev; | 475 | *result = &dd->dm_dev; |
489 | |||
490 | return 0; | 476 | return 0; |
491 | } | 477 | } |
492 | 478 | ||
493 | void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) | 479 | /* |
480 | * Returns the minimum that is _not_ zero, unless both are zero. | ||
481 | */ | ||
482 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | ||
483 | |||
484 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | ||
485 | sector_t start, void *data) | ||
494 | { | 486 | { |
487 | struct queue_limits *limits = data; | ||
488 | struct block_device *bdev = dev->bdev; | ||
495 | struct request_queue *q = bdev_get_queue(bdev); | 489 | struct request_queue *q = bdev_get_queue(bdev); |
496 | struct io_restrictions *rs = &ti->limits; | ||
497 | char b[BDEVNAME_SIZE]; | 490 | char b[BDEVNAME_SIZE]; |
498 | 491 | ||
499 | if (unlikely(!q)) { | 492 | if (unlikely(!q)) { |
500 | DMWARN("%s: Cannot set limits for nonexistent device %s", | 493 | DMWARN("%s: Cannot set limits for nonexistent device %s", |
501 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 494 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
502 | return; | 495 | return 0; |
503 | } | 496 | } |
504 | 497 | ||
505 | /* | 498 | if (blk_stack_limits(limits, &q->limits, start) < 0) |
506 | * Combine the device limits low. | 499 | DMWARN("%s: target device %s is misaligned", |
507 | * | 500 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
508 | * FIXME: if we move an io_restriction struct | ||
509 | * into q this would just be a call to | ||
510 | * combine_restrictions_low() | ||
511 | */ | ||
512 | rs->max_sectors = | ||
513 | min_not_zero(rs->max_sectors, queue_max_sectors(q)); | ||
514 | 501 | ||
515 | /* | 502 | /* |
516 | * Check if merge fn is supported. | 503 | * Check if merge fn is supported. |
@@ -519,48 +506,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) | |||
519 | */ | 506 | */ |
520 | 507 | ||
521 | if (q->merge_bvec_fn && !ti->type->merge) | 508 | if (q->merge_bvec_fn && !ti->type->merge) |
522 | rs->max_sectors = | 509 | limits->max_sectors = |
523 | min_not_zero(rs->max_sectors, | 510 | min_not_zero(limits->max_sectors, |
524 | (unsigned int) (PAGE_SIZE >> 9)); | 511 | (unsigned int) (PAGE_SIZE >> 9)); |
525 | 512 | return 0; | |
526 | rs->max_phys_segments = | ||
527 | min_not_zero(rs->max_phys_segments, | ||
528 | queue_max_phys_segments(q)); | ||
529 | |||
530 | rs->max_hw_segments = | ||
531 | min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q)); | ||
532 | |||
533 | rs->logical_block_size = max(rs->logical_block_size, | ||
534 | queue_logical_block_size(q)); | ||
535 | |||
536 | rs->max_segment_size = | ||
537 | min_not_zero(rs->max_segment_size, queue_max_segment_size(q)); | ||
538 | |||
539 | rs->max_hw_sectors = | ||
540 | min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q)); | ||
541 | |||
542 | rs->seg_boundary_mask = | ||
543 | min_not_zero(rs->seg_boundary_mask, | ||
544 | queue_segment_boundary(q)); | ||
545 | |||
546 | rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q)); | ||
547 | |||
548 | rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | ||
549 | } | 513 | } |
550 | EXPORT_SYMBOL_GPL(dm_set_device_limits); | 514 | EXPORT_SYMBOL_GPL(dm_set_device_limits); |
551 | 515 | ||
552 | int dm_get_device(struct dm_target *ti, const char *path, sector_t start, | 516 | int dm_get_device(struct dm_target *ti, const char *path, sector_t start, |
553 | sector_t len, fmode_t mode, struct dm_dev **result) | 517 | sector_t len, fmode_t mode, struct dm_dev **result) |
554 | { | 518 | { |
555 | int r = __table_get_device(ti->table, ti, path, | 519 | return __table_get_device(ti->table, ti, path, |
556 | start, len, mode, result); | 520 | start, len, mode, result); |
557 | |||
558 | if (!r) | ||
559 | dm_set_device_limits(ti, (*result)->bdev); | ||
560 | |||
561 | return r; | ||
562 | } | 521 | } |
563 | 522 | ||
523 | |||
564 | /* | 524 | /* |
565 | * Decrement a devices use count and remove it if necessary. | 525 | * Decrement a devices use count and remove it if necessary. |
566 | */ | 526 | */ |
@@ -675,24 +635,78 @@ int dm_split_args(int *argc, char ***argvp, char *input) | |||
675 | return 0; | 635 | return 0; |
676 | } | 636 | } |
677 | 637 | ||
678 | static void check_for_valid_limits(struct io_restrictions *rs) | 638 | /* |
639 | * Impose necessary and sufficient conditions on a devices's table such | ||
640 | * that any incoming bio which respects its logical_block_size can be | ||
641 | * processed successfully. If it falls across the boundary between | ||
642 | * two or more targets, the size of each piece it gets split into must | ||
643 | * be compatible with the logical_block_size of the target processing it. | ||
644 | */ | ||
645 | static int validate_hardware_logical_block_alignment(struct dm_table *table, | ||
646 | struct queue_limits *limits) | ||
679 | { | 647 | { |
680 | if (!rs->max_sectors) | 648 | /* |
681 | rs->max_sectors = SAFE_MAX_SECTORS; | 649 | * This function uses arithmetic modulo the logical_block_size |
682 | if (!rs->max_hw_sectors) | 650 | * (in units of 512-byte sectors). |
683 | rs->max_hw_sectors = SAFE_MAX_SECTORS; | 651 | */ |
684 | if (!rs->max_phys_segments) | 652 | unsigned short device_logical_block_size_sects = |
685 | rs->max_phys_segments = MAX_PHYS_SEGMENTS; | 653 | limits->logical_block_size >> SECTOR_SHIFT; |
686 | if (!rs->max_hw_segments) | 654 | |
687 | rs->max_hw_segments = MAX_HW_SEGMENTS; | 655 | /* |
688 | if (!rs->logical_block_size) | 656 | * Offset of the start of the next table entry, mod logical_block_size. |
689 | rs->logical_block_size = 1 << SECTOR_SHIFT; | 657 | */ |
690 | if (!rs->max_segment_size) | 658 | unsigned short next_target_start = 0; |
691 | rs->max_segment_size = MAX_SEGMENT_SIZE; | 659 | |
692 | if (!rs->seg_boundary_mask) | 660 | /* |
693 | rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 661 | * Given an aligned bio that extends beyond the end of a |
694 | if (!rs->bounce_pfn) | 662 | * target, how many sectors must the next target handle? |
695 | rs->bounce_pfn = -1; | 663 | */ |
664 | unsigned short remaining = 0; | ||
665 | |||
666 | struct dm_target *uninitialized_var(ti); | ||
667 | struct queue_limits ti_limits; | ||
668 | unsigned i = 0; | ||
669 | |||
670 | /* | ||
671 | * Check each entry in the table in turn. | ||
672 | */ | ||
673 | while (i < dm_table_get_num_targets(table)) { | ||
674 | ti = dm_table_get_target(table, i++); | ||
675 | |||
676 | blk_set_default_limits(&ti_limits); | ||
677 | |||
678 | /* combine all target devices' limits */ | ||
679 | if (ti->type->iterate_devices) | ||
680 | ti->type->iterate_devices(ti, dm_set_device_limits, | ||
681 | &ti_limits); | ||
682 | |||
683 | /* | ||
684 | * If the remaining sectors fall entirely within this | ||
685 | * table entry are they compatible with its logical_block_size? | ||
686 | */ | ||
687 | if (remaining < ti->len && | ||
688 | remaining & ((ti_limits.logical_block_size >> | ||
689 | SECTOR_SHIFT) - 1)) | ||
690 | break; /* Error */ | ||
691 | |||
692 | next_target_start = | ||
693 | (unsigned short) ((next_target_start + ti->len) & | ||
694 | (device_logical_block_size_sects - 1)); | ||
695 | remaining = next_target_start ? | ||
696 | device_logical_block_size_sects - next_target_start : 0; | ||
697 | } | ||
698 | |||
699 | if (remaining) { | ||
700 | DMWARN("%s: table line %u (start sect %llu len %llu) " | ||
701 | "not aligned to h/w logical block size %hu", | ||
702 | dm_device_name(table->md), i, | ||
703 | (unsigned long long) ti->begin, | ||
704 | (unsigned long long) ti->len, | ||
705 | limits->logical_block_size); | ||
706 | return -EINVAL; | ||
707 | } | ||
708 | |||
709 | return 0; | ||
696 | } | 710 | } |
697 | 711 | ||
698 | int dm_table_add_target(struct dm_table *t, const char *type, | 712 | int dm_table_add_target(struct dm_table *t, const char *type, |
@@ -747,9 +761,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
747 | 761 | ||
748 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; | 762 | t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; |
749 | 763 | ||
750 | /* FIXME: the plan is to combine high here and then have | ||
751 | * the merge fn apply the target level restrictions. */ | ||
752 | combine_restrictions_low(&t->limits, &tgt->limits); | ||
753 | return 0; | 764 | return 0; |
754 | 765 | ||
755 | bad: | 766 | bad: |
@@ -758,6 +769,104 @@ int dm_table_add_target(struct dm_table *t, const char *type, | |||
758 | return r; | 769 | return r; |
759 | } | 770 | } |
760 | 771 | ||
772 | int dm_table_set_type(struct dm_table *t) | ||
773 | { | ||
774 | unsigned i; | ||
775 | unsigned bio_based = 0, request_based = 0; | ||
776 | struct dm_target *tgt; | ||
777 | struct dm_dev_internal *dd; | ||
778 | struct list_head *devices; | ||
779 | |||
780 | for (i = 0; i < t->num_targets; i++) { | ||
781 | tgt = t->targets + i; | ||
782 | if (dm_target_request_based(tgt)) | ||
783 | request_based = 1; | ||
784 | else | ||
785 | bio_based = 1; | ||
786 | |||
787 | if (bio_based && request_based) { | ||
788 | DMWARN("Inconsistent table: different target types" | ||
789 | " can't be mixed up"); | ||
790 | return -EINVAL; | ||
791 | } | ||
792 | } | ||
793 | |||
794 | if (bio_based) { | ||
795 | /* We must use this table as bio-based */ | ||
796 | t->type = DM_TYPE_BIO_BASED; | ||
797 | return 0; | ||
798 | } | ||
799 | |||
800 | BUG_ON(!request_based); /* No targets in this table */ | ||
801 | |||
802 | /* Non-request-stackable devices can't be used for request-based dm */ | ||
803 | devices = dm_table_get_devices(t); | ||
804 | list_for_each_entry(dd, devices, list) { | ||
805 | if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) { | ||
806 | DMWARN("table load rejected: including" | ||
807 | " non-request-stackable devices"); | ||
808 | return -EINVAL; | ||
809 | } | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Request-based dm supports only tables that have a single target now. | ||
814 | * To support multiple targets, request splitting support is needed, | ||
815 | * and that needs lots of changes in the block-layer. | ||
816 | * (e.g. request completion process for partial completion.) | ||
817 | */ | ||
818 | if (t->num_targets > 1) { | ||
819 | DMWARN("Request-based dm doesn't support multiple targets yet"); | ||
820 | return -EINVAL; | ||
821 | } | ||
822 | |||
823 | t->type = DM_TYPE_REQUEST_BASED; | ||
824 | |||
825 | return 0; | ||
826 | } | ||
827 | |||
828 | unsigned dm_table_get_type(struct dm_table *t) | ||
829 | { | ||
830 | return t->type; | ||
831 | } | ||
832 | |||
833 | bool dm_table_bio_based(struct dm_table *t) | ||
834 | { | ||
835 | return dm_table_get_type(t) == DM_TYPE_BIO_BASED; | ||
836 | } | ||
837 | |||
838 | bool dm_table_request_based(struct dm_table *t) | ||
839 | { | ||
840 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | ||
841 | } | ||
842 | |||
843 | int dm_table_alloc_md_mempools(struct dm_table *t) | ||
844 | { | ||
845 | unsigned type = dm_table_get_type(t); | ||
846 | |||
847 | if (unlikely(type == DM_TYPE_NONE)) { | ||
848 | DMWARN("no table type is set, can't allocate mempools"); | ||
849 | return -EINVAL; | ||
850 | } | ||
851 | |||
852 | t->mempools = dm_alloc_md_mempools(type); | ||
853 | if (!t->mempools) | ||
854 | return -ENOMEM; | ||
855 | |||
856 | return 0; | ||
857 | } | ||
858 | |||
859 | void dm_table_free_md_mempools(struct dm_table *t) | ||
860 | { | ||
861 | dm_free_md_mempools(t->mempools); | ||
862 | t->mempools = NULL; | ||
863 | } | ||
864 | |||
865 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) | ||
866 | { | ||
867 | return t->mempools; | ||
868 | } | ||
869 | |||
761 | static int setup_indexes(struct dm_table *t) | 870 | static int setup_indexes(struct dm_table *t) |
762 | { | 871 | { |
763 | int i; | 872 | int i; |
@@ -792,8 +901,6 @@ int dm_table_complete(struct dm_table *t) | |||
792 | int r = 0; | 901 | int r = 0; |
793 | unsigned int leaf_nodes; | 902 | unsigned int leaf_nodes; |
794 | 903 | ||
795 | check_for_valid_limits(&t->limits); | ||
796 | |||
797 | /* how many indexes will the btree have ? */ | 904 | /* how many indexes will the btree have ? */ |
798 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | 905 | leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); |
799 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | 906 | t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); |
@@ -869,6 +976,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) | |||
869 | } | 976 | } |
870 | 977 | ||
871 | /* | 978 | /* |
979 | * Establish the new table's queue_limits and validate them. | ||
980 | */ | ||
981 | int dm_calculate_queue_limits(struct dm_table *table, | ||
982 | struct queue_limits *limits) | ||
983 | { | ||
984 | struct dm_target *uninitialized_var(ti); | ||
985 | struct queue_limits ti_limits; | ||
986 | unsigned i = 0; | ||
987 | |||
988 | blk_set_default_limits(limits); | ||
989 | |||
990 | while (i < dm_table_get_num_targets(table)) { | ||
991 | blk_set_default_limits(&ti_limits); | ||
992 | |||
993 | ti = dm_table_get_target(table, i++); | ||
994 | |||
995 | if (!ti->type->iterate_devices) | ||
996 | goto combine_limits; | ||
997 | |||
998 | /* | ||
999 | * Combine queue limits of all the devices this target uses. | ||
1000 | */ | ||
1001 | ti->type->iterate_devices(ti, dm_set_device_limits, | ||
1002 | &ti_limits); | ||
1003 | |||
1004 | /* | ||
1005 | * Check each device area is consistent with the target's | ||
1006 | * overall queue limits. | ||
1007 | */ | ||
1008 | if (!ti->type->iterate_devices(ti, device_area_is_valid, | ||
1009 | &ti_limits)) | ||
1010 | return -EINVAL; | ||
1011 | |||
1012 | combine_limits: | ||
1013 | /* | ||
1014 | * Merge this target's queue limits into the overall limits | ||
1015 | * for the table. | ||
1016 | */ | ||
1017 | if (blk_stack_limits(limits, &ti_limits, 0) < 0) | ||
1018 | DMWARN("%s: target device " | ||
1019 | "(start sect %llu len %llu) " | ||
1020 | "is misaligned", | ||
1021 | dm_device_name(table->md), | ||
1022 | (unsigned long long) ti->begin, | ||
1023 | (unsigned long long) ti->len); | ||
1024 | } | ||
1025 | |||
1026 | return validate_hardware_logical_block_alignment(table, limits); | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
872 | * Set the integrity profile for this device if all devices used have | 1030 | * Set the integrity profile for this device if all devices used have |
873 | * matching profiles. | 1031 | * matching profiles. |
874 | */ | 1032 | */ |
@@ -907,27 +1065,42 @@ no_integrity: | |||
907 | return; | 1065 | return; |
908 | } | 1066 | } |
909 | 1067 | ||
910 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) | 1068 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
1069 | struct queue_limits *limits) | ||
911 | { | 1070 | { |
912 | /* | 1071 | /* |
913 | * Make sure we obey the optimistic sub devices | 1072 | * Each target device in the table has a data area that should normally |
914 | * restrictions. | 1073 | * be aligned such that the DM device's alignment_offset is 0. |
1074 | * FIXME: Propagate alignment_offsets up the stack and warn of | ||
1075 | * sub-optimal or inconsistent settings. | ||
1076 | */ | ||
1077 | limits->alignment_offset = 0; | ||
1078 | limits->misaligned = 0; | ||
1079 | |||
1080 | /* | ||
1081 | * Copy table's limits to the DM device's request_queue | ||
915 | */ | 1082 | */ |
916 | blk_queue_max_sectors(q, t->limits.max_sectors); | 1083 | q->limits = *limits; |
917 | blk_queue_max_phys_segments(q, t->limits.max_phys_segments); | 1084 | |
918 | blk_queue_max_hw_segments(q, t->limits.max_hw_segments); | 1085 | if (limits->no_cluster) |
919 | blk_queue_logical_block_size(q, t->limits.logical_block_size); | ||
920 | blk_queue_max_segment_size(q, t->limits.max_segment_size); | ||
921 | blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); | ||
922 | blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); | ||
923 | blk_queue_bounce_limit(q, t->limits.bounce_pfn); | ||
924 | |||
925 | if (t->limits.no_cluster) | ||
926 | queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); | 1086 | queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); |
927 | else | 1087 | else |
928 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); | 1088 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); |
929 | 1089 | ||
930 | dm_table_set_integrity(t); | 1090 | dm_table_set_integrity(t); |
1091 | |||
1092 | /* | ||
1093 | * QUEUE_FLAG_STACKABLE must be set after all queue settings are | ||
1094 | * visible to other CPUs because, once the flag is set, incoming bios | ||
1095 | * are processed by request-based dm, which refers to the queue | ||
1096 | * settings. | ||
1097 | * Until the flag set, bios are passed to bio-based dm and queued to | ||
1098 | * md->deferred where queue settings are not needed yet. | ||
1099 | * Those bios are passed to request-based dm at the resume time. | ||
1100 | */ | ||
1101 | smp_mb(); | ||
1102 | if (dm_table_request_based(t)) | ||
1103 | queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); | ||
931 | } | 1104 | } |
932 | 1105 | ||
933 | unsigned int dm_table_get_num_targets(struct dm_table *t) | 1106 | unsigned int dm_table_get_num_targets(struct dm_table *t) |
@@ -1023,6 +1196,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) | |||
1023 | return r; | 1196 | return r; |
1024 | } | 1197 | } |
1025 | 1198 | ||
1199 | int dm_table_any_busy_target(struct dm_table *t) | ||
1200 | { | ||
1201 | unsigned i; | ||
1202 | struct dm_target *ti; | ||
1203 | |||
1204 | for (i = 0; i < t->num_targets; i++) { | ||
1205 | ti = t->targets + i; | ||
1206 | if (ti->type->busy && ti->type->busy(ti)) | ||
1207 | return 1; | ||
1208 | } | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1026 | void dm_table_unplug_all(struct dm_table *t) | 1213 | void dm_table_unplug_all(struct dm_table *t) |
1027 | { | 1214 | { |
1028 | struct dm_dev_internal *dd; | 1215 | struct dm_dev_internal *dd; |