diff options
author | Tejun Heo <tj@kernel.org> | 2008-08-25 06:56:15 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-10-09 02:56:08 -0400 |
commit | 540eed5637b766bb1e881ef744c42617760b4815 (patch) | |
tree | 8bdf54967a8290f780216f767291623e67ee7e9b | |
parent | 074a7aca7afa6f230104e8e65eba3420263714a5 (diff) |
block: make partition array dynamic
disk->__part used to be statically allocated to the maximum possible
number of partitions. This patch makes partition array allocation
dynamic. The added overhead is minimal as only real change is one
memory dereference changed to RCU one. This saves both a bit of
memory and cpu cycles iterating through unoccupied slots and makes
increasing partition limit easier.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/genhd.c | 129 | ||||
-rw-r--r-- | block/ioctl.c | 2 | ||||
-rw-r--r-- | fs/partitions/check.c | 31 | ||||
-rw-r--r-- | include/linux/genhd.h | 19 |
4 files changed, 154 insertions, 27 deletions
diff --git a/block/genhd.c b/block/genhd.c index e1cb96fb883e..c2b14aa69d58 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -52,14 +52,21 @@ static struct device_type disk_type; | |||
52 | */ | 52 | */ |
53 | struct hd_struct *disk_get_part(struct gendisk *disk, int partno) | 53 | struct hd_struct *disk_get_part(struct gendisk *disk, int partno) |
54 | { | 54 | { |
55 | struct hd_struct *part; | 55 | struct hd_struct *part = NULL; |
56 | struct disk_part_tbl *ptbl; | ||
56 | 57 | ||
57 | if (unlikely(partno < 0 || partno >= disk_max_parts(disk))) | 58 | if (unlikely(partno < 0)) |
58 | return NULL; | 59 | return NULL; |
60 | |||
59 | rcu_read_lock(); | 61 | rcu_read_lock(); |
60 | part = rcu_dereference(disk->__part[partno]); | 62 | |
61 | if (part) | 63 | ptbl = rcu_dereference(disk->part_tbl); |
62 | get_device(part_to_dev(part)); | 64 | if (likely(partno < ptbl->len)) { |
65 | part = rcu_dereference(ptbl->part[partno]); | ||
66 | if (part) | ||
67 | get_device(part_to_dev(part)); | ||
68 | } | ||
69 | |||
63 | rcu_read_unlock(); | 70 | rcu_read_unlock(); |
64 | 71 | ||
65 | return part; | 72 | return part; |
@@ -80,17 +87,24 @@ EXPORT_SYMBOL_GPL(disk_get_part); | |||
80 | void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, | 87 | void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, |
81 | unsigned int flags) | 88 | unsigned int flags) |
82 | { | 89 | { |
90 | struct disk_part_tbl *ptbl; | ||
91 | |||
92 | rcu_read_lock(); | ||
93 | ptbl = rcu_dereference(disk->part_tbl); | ||
94 | |||
83 | piter->disk = disk; | 95 | piter->disk = disk; |
84 | piter->part = NULL; | 96 | piter->part = NULL; |
85 | 97 | ||
86 | if (flags & DISK_PITER_REVERSE) | 98 | if (flags & DISK_PITER_REVERSE) |
87 | piter->idx = disk_max_parts(piter->disk) - 1; | 99 | piter->idx = ptbl->len - 1; |
88 | else if (flags & DISK_PITER_INCL_PART0) | 100 | else if (flags & DISK_PITER_INCL_PART0) |
89 | piter->idx = 0; | 101 | piter->idx = 0; |
90 | else | 102 | else |
91 | piter->idx = 1; | 103 | piter->idx = 1; |
92 | 104 | ||
93 | piter->flags = flags; | 105 | piter->flags = flags; |
106 | |||
107 | rcu_read_unlock(); | ||
94 | } | 108 | } |
95 | EXPORT_SYMBOL_GPL(disk_part_iter_init); | 109 | EXPORT_SYMBOL_GPL(disk_part_iter_init); |
96 | 110 | ||
@@ -105,13 +119,16 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init); | |||
105 | */ | 119 | */ |
106 | struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) | 120 | struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) |
107 | { | 121 | { |
122 | struct disk_part_tbl *ptbl; | ||
108 | int inc, end; | 123 | int inc, end; |
109 | 124 | ||
110 | /* put the last partition */ | 125 | /* put the last partition */ |
111 | disk_put_part(piter->part); | 126 | disk_put_part(piter->part); |
112 | piter->part = NULL; | 127 | piter->part = NULL; |
113 | 128 | ||
129 | /* get part_tbl */ | ||
114 | rcu_read_lock(); | 130 | rcu_read_lock(); |
131 | ptbl = rcu_dereference(piter->disk->part_tbl); | ||
115 | 132 | ||
116 | /* determine iteration parameters */ | 133 | /* determine iteration parameters */ |
117 | if (piter->flags & DISK_PITER_REVERSE) { | 134 | if (piter->flags & DISK_PITER_REVERSE) { |
@@ -122,14 +139,14 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) | |||
122 | end = 0; | 139 | end = 0; |
123 | } else { | 140 | } else { |
124 | inc = 1; | 141 | inc = 1; |
125 | end = disk_max_parts(piter->disk); | 142 | end = ptbl->len; |
126 | } | 143 | } |
127 | 144 | ||
128 | /* iterate to the next partition */ | 145 | /* iterate to the next partition */ |
129 | for (; piter->idx != end; piter->idx += inc) { | 146 | for (; piter->idx != end; piter->idx += inc) { |
130 | struct hd_struct *part; | 147 | struct hd_struct *part; |
131 | 148 | ||
132 | part = rcu_dereference(piter->disk->__part[piter->idx]); | 149 | part = rcu_dereference(ptbl->part[piter->idx]); |
133 | if (!part) | 150 | if (!part) |
134 | continue; | 151 | continue; |
135 | if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) | 152 | if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) |
@@ -180,10 +197,13 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); | |||
180 | */ | 197 | */ |
181 | struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) | 198 | struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) |
182 | { | 199 | { |
200 | struct disk_part_tbl *ptbl; | ||
183 | int i; | 201 | int i; |
184 | 202 | ||
185 | for (i = 1; i < disk_max_parts(disk); i++) { | 203 | ptbl = rcu_dereference(disk->part_tbl); |
186 | struct hd_struct *part = rcu_dereference(disk->__part[i]); | 204 | |
205 | for (i = 1; i < ptbl->len; i++) { | ||
206 | struct hd_struct *part = rcu_dereference(ptbl->part[i]); | ||
187 | 207 | ||
188 | if (part && part->start_sect <= sector && | 208 | if (part && part->start_sect <= sector && |
189 | sector < part->start_sect + part->nr_sects) | 209 | sector < part->start_sect + part->nr_sects) |
@@ -798,12 +818,86 @@ static struct attribute_group *disk_attr_groups[] = { | |||
798 | NULL | 818 | NULL |
799 | }; | 819 | }; |
800 | 820 | ||
821 | static void disk_free_ptbl_rcu_cb(struct rcu_head *head) | ||
822 | { | ||
823 | struct disk_part_tbl *ptbl = | ||
824 | container_of(head, struct disk_part_tbl, rcu_head); | ||
825 | |||
826 | kfree(ptbl); | ||
827 | } | ||
828 | |||
829 | /** | ||
830 | * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way | ||
831 | * @disk: disk to replace part_tbl for | ||
832 | * @new_ptbl: new part_tbl to install | ||
833 | * | ||
834 | * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The | ||
835 | * original ptbl is freed using RCU callback. | ||
836 | * | ||
837 | * LOCKING: | ||
838 | * Matching bd_mutx locked. | ||
839 | */ | ||
840 | static void disk_replace_part_tbl(struct gendisk *disk, | ||
841 | struct disk_part_tbl *new_ptbl) | ||
842 | { | ||
843 | struct disk_part_tbl *old_ptbl = disk->part_tbl; | ||
844 | |||
845 | rcu_assign_pointer(disk->part_tbl, new_ptbl); | ||
846 | if (old_ptbl) | ||
847 | call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); | ||
848 | } | ||
849 | |||
850 | /** | ||
851 | * disk_expand_part_tbl - expand disk->part_tbl | ||
852 | * @disk: disk to expand part_tbl for | ||
853 | * @partno: expand such that this partno can fit in | ||
854 | * | ||
855 | * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl | ||
856 | * uses RCU to allow unlocked dereferencing for stats and other stuff. | ||
857 | * | ||
858 | * LOCKING: | ||
859 | * Matching bd_mutex locked, might sleep. | ||
860 | * | ||
861 | * RETURNS: | ||
862 | * 0 on success, -errno on failure. | ||
863 | */ | ||
864 | int disk_expand_part_tbl(struct gendisk *disk, int partno) | ||
865 | { | ||
866 | struct disk_part_tbl *old_ptbl = disk->part_tbl; | ||
867 | struct disk_part_tbl *new_ptbl; | ||
868 | int len = old_ptbl ? old_ptbl->len : 0; | ||
869 | int target = partno + 1; | ||
870 | size_t size; | ||
871 | int i; | ||
872 | |||
873 | /* disk_max_parts() is zero during initialization, ignore if so */ | ||
874 | if (disk_max_parts(disk) && target > disk_max_parts(disk)) | ||
875 | return -EINVAL; | ||
876 | |||
877 | if (target <= len) | ||
878 | return 0; | ||
879 | |||
880 | size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); | ||
881 | new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); | ||
882 | if (!new_ptbl) | ||
883 | return -ENOMEM; | ||
884 | |||
885 | INIT_RCU_HEAD(&new_ptbl->rcu_head); | ||
886 | new_ptbl->len = target; | ||
887 | |||
888 | for (i = 0; i < len; i++) | ||
889 | rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); | ||
890 | |||
891 | disk_replace_part_tbl(disk, new_ptbl); | ||
892 | return 0; | ||
893 | } | ||
894 | |||
801 | static void disk_release(struct device *dev) | 895 | static void disk_release(struct device *dev) |
802 | { | 896 | { |
803 | struct gendisk *disk = dev_to_disk(dev); | 897 | struct gendisk *disk = dev_to_disk(dev); |
804 | 898 | ||
805 | kfree(disk->random); | 899 | kfree(disk->random); |
806 | kfree(disk->__part); | 900 | disk_replace_part_tbl(disk, NULL); |
807 | free_part_stats(&disk->part0); | 901 | free_part_stats(&disk->part0); |
808 | kfree(disk); | 902 | kfree(disk); |
809 | } | 903 | } |
@@ -948,22 +1042,16 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) | |||
948 | disk = kmalloc_node(sizeof(struct gendisk), | 1042 | disk = kmalloc_node(sizeof(struct gendisk), |
949 | GFP_KERNEL | __GFP_ZERO, node_id); | 1043 | GFP_KERNEL | __GFP_ZERO, node_id); |
950 | if (disk) { | 1044 | if (disk) { |
951 | int tot_minors = minors + ext_minors; | ||
952 | int size = tot_minors * sizeof(struct hd_struct *); | ||
953 | |||
954 | if (!init_part_stats(&disk->part0)) { | 1045 | if (!init_part_stats(&disk->part0)) { |
955 | kfree(disk); | 1046 | kfree(disk); |
956 | return NULL; | 1047 | return NULL; |
957 | } | 1048 | } |
958 | 1049 | if (disk_expand_part_tbl(disk, 0)) { | |
959 | disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, | 1050 | free_part_stats(&disk->part0); |
960 | node_id); | ||
961 | if (!disk->__part) { | ||
962 | free_part_stats(&disk->part0); | ||
963 | kfree(disk); | 1051 | kfree(disk); |
964 | return NULL; | 1052 | return NULL; |
965 | } | 1053 | } |
966 | disk->__part[0] = &disk->part0; | 1054 | disk->part_tbl->part[0] = &disk->part0; |
967 | 1055 | ||
968 | disk->minors = minors; | 1056 | disk->minors = minors; |
969 | disk->ext_minors = ext_minors; | 1057 | disk->ext_minors = ext_minors; |
@@ -973,6 +1061,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) | |||
973 | device_initialize(disk_to_dev(disk)); | 1061 | device_initialize(disk_to_dev(disk)); |
974 | INIT_WORK(&disk->async_notify, | 1062 | INIT_WORK(&disk->async_notify, |
975 | media_change_notify_thread); | 1063 | media_change_notify_thread); |
1064 | disk->node_id = node_id; | ||
976 | } | 1065 | } |
977 | return disk; | 1066 | return disk; |
978 | } | 1067 | } |
diff --git a/block/ioctl.c b/block/ioctl.c index 64e7c67a64b0..38bee321e1fa 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -30,7 +30,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
30 | if (bdev != bdev->bd_contains) | 30 | if (bdev != bdev->bd_contains) |
31 | return -EINVAL; | 31 | return -EINVAL; |
32 | partno = p.pno; | 32 | partno = p.pno; |
33 | if (partno <= 0 || partno >= disk_max_parts(disk)) | 33 | if (partno <= 0) |
34 | return -EINVAL; | 34 | return -EINVAL; |
35 | switch (a.op) { | 35 | switch (a.op) { |
36 | case BLKPG_ADD_PARTITION: | 36 | case BLKPG_ADD_PARTITION: |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f517869e8d10..772b2ed8d239 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -312,14 +312,18 @@ static void delete_partition_rcu_cb(struct rcu_head *head) | |||
312 | 312 | ||
313 | void delete_partition(struct gendisk *disk, int partno) | 313 | void delete_partition(struct gendisk *disk, int partno) |
314 | { | 314 | { |
315 | struct disk_part_tbl *ptbl = disk->part_tbl; | ||
315 | struct hd_struct *part; | 316 | struct hd_struct *part; |
316 | 317 | ||
317 | part = disk->__part[partno]; | 318 | if (partno >= ptbl->len) |
319 | return; | ||
320 | |||
321 | part = ptbl->part[partno]; | ||
318 | if (!part) | 322 | if (!part) |
319 | return; | 323 | return; |
320 | 324 | ||
321 | blk_free_devt(part_devt(part)); | 325 | blk_free_devt(part_devt(part)); |
322 | rcu_assign_pointer(disk->__part[partno], NULL); | 326 | rcu_assign_pointer(ptbl->part[partno], NULL); |
323 | kobject_put(part->holder_dir); | 327 | kobject_put(part->holder_dir); |
324 | device_del(part_to_dev(part)); | 328 | device_del(part_to_dev(part)); |
325 | 329 | ||
@@ -341,10 +345,16 @@ int add_partition(struct gendisk *disk, int partno, | |||
341 | dev_t devt = MKDEV(0, 0); | 345 | dev_t devt = MKDEV(0, 0); |
342 | struct device *ddev = disk_to_dev(disk); | 346 | struct device *ddev = disk_to_dev(disk); |
343 | struct device *pdev; | 347 | struct device *pdev; |
348 | struct disk_part_tbl *ptbl; | ||
344 | const char *dname; | 349 | const char *dname; |
345 | int err; | 350 | int err; |
346 | 351 | ||
347 | if (disk->__part[partno]) | 352 | err = disk_expand_part_tbl(disk, partno); |
353 | if (err) | ||
354 | return err; | ||
355 | ptbl = disk->part_tbl; | ||
356 | |||
357 | if (ptbl->part[partno]) | ||
348 | return -EBUSY; | 358 | return -EBUSY; |
349 | 359 | ||
350 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 360 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
@@ -398,7 +408,7 @@ int add_partition(struct gendisk *disk, int partno, | |||
398 | 408 | ||
399 | /* everything is up and running, commence */ | 409 | /* everything is up and running, commence */ |
400 | INIT_RCU_HEAD(&p->rcu_head); | 410 | INIT_RCU_HEAD(&p->rcu_head); |
401 | rcu_assign_pointer(disk->__part[partno], p); | 411 | rcu_assign_pointer(ptbl->part[partno], p); |
402 | 412 | ||
403 | /* suppress uevent if the disk supresses it */ | 413 | /* suppress uevent if the disk supresses it */ |
404 | if (!ddev->uevent_suppress) | 414 | if (!ddev->uevent_suppress) |
@@ -487,7 +497,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
487 | struct disk_part_iter piter; | 497 | struct disk_part_iter piter; |
488 | struct hd_struct *part; | 498 | struct hd_struct *part; |
489 | struct parsed_partitions *state; | 499 | struct parsed_partitions *state; |
490 | int p, res; | 500 | int p, highest, res; |
491 | 501 | ||
492 | if (bdev->bd_part_count) | 502 | if (bdev->bd_part_count) |
493 | return -EBUSY; | 503 | return -EBUSY; |
@@ -511,6 +521,17 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
511 | /* tell userspace that the media / partition table may have changed */ | 521 | /* tell userspace that the media / partition table may have changed */ |
512 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); | 522 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
513 | 523 | ||
524 | /* Detect the highest partition number and preallocate | ||
525 | * disk->part_tbl. This is an optimization and not strictly | ||
526 | * necessary. | ||
527 | */ | ||
528 | for (p = 1, highest = 0; p < state->limit; p++) | ||
529 | if (state->parts[p].size) | ||
530 | highest = p; | ||
531 | |||
532 | disk_expand_part_tbl(disk, highest); | ||
533 | |||
534 | /* add partitions */ | ||
514 | for (p = 1; p < state->limit; p++) { | 535 | for (p = 1; p < state->limit; p++) { |
515 | sector_t size = state->parts[p].size; | 536 | sector_t size = state->parts[p].size; |
516 | sector_t from = state->parts[p].from; | 537 | sector_t from = state->parts[p].from; |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c90e1b4fbe5a..ecf649c3deed 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -113,6 +113,21 @@ struct hd_struct { | |||
113 | #define GENHD_FL_UP 16 | 113 | #define GENHD_FL_UP 16 |
114 | #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 | 114 | #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 |
115 | 115 | ||
116 | #define BLK_SCSI_MAX_CMDS (256) | ||
117 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) | ||
118 | |||
119 | struct blk_scsi_cmd_filter { | ||
120 | unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; | ||
121 | unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; | ||
122 | struct kobject kobj; | ||
123 | }; | ||
124 | |||
125 | struct disk_part_tbl { | ||
126 | struct rcu_head rcu_head; | ||
127 | int len; | ||
128 | struct hd_struct *part[]; | ||
129 | }; | ||
130 | |||
116 | struct gendisk { | 131 | struct gendisk { |
117 | /* major, first_minor, minors and ext_minors are input | 132 | /* major, first_minor, minors and ext_minors are input |
118 | * parameters only, don't use directly. Use disk_devt() and | 133 | * parameters only, don't use directly. Use disk_devt() and |
@@ -131,7 +146,7 @@ struct gendisk { | |||
131 | * non-critical accesses use RCU. Always access through | 146 | * non-critical accesses use RCU. Always access through |
132 | * helpers. | 147 | * helpers. |
133 | */ | 148 | */ |
134 | struct hd_struct **__part; | 149 | struct disk_part_tbl *part_tbl; |
135 | struct hd_struct part0; | 150 | struct hd_struct part0; |
136 | 151 | ||
137 | struct block_device_operations *fops; | 152 | struct block_device_operations *fops; |
@@ -149,6 +164,7 @@ struct gendisk { | |||
149 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 164 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
150 | struct blk_integrity *integrity; | 165 | struct blk_integrity *integrity; |
151 | #endif | 166 | #endif |
167 | int node_id; | ||
152 | }; | 168 | }; |
153 | 169 | ||
154 | static inline struct gendisk *part_to_disk(struct hd_struct *part) | 170 | static inline struct gendisk *part_to_disk(struct hd_struct *part) |
@@ -503,6 +519,7 @@ extern void blk_free_devt(dev_t devt); | |||
503 | extern dev_t blk_lookup_devt(const char *name, int partno); | 519 | extern dev_t blk_lookup_devt(const char *name, int partno); |
504 | extern char *disk_name (struct gendisk *hd, int partno, char *buf); | 520 | extern char *disk_name (struct gendisk *hd, int partno, char *buf); |
505 | 521 | ||
522 | extern int disk_expand_part_tbl(struct gendisk *disk, int target); | ||
506 | extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); | 523 | extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); |
507 | extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); | 524 | extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); |
508 | extern void delete_partition(struct gendisk *, int); | 525 | extern void delete_partition(struct gendisk *, int); |