aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-08-25 06:56:15 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 02:56:08 -0400
commit540eed5637b766bb1e881ef744c42617760b4815 (patch)
tree8bdf54967a8290f780216f767291623e67ee7e9b
parent074a7aca7afa6f230104e8e65eba3420263714a5 (diff)
block: make partition array dynamic
disk->__part used to be statically allocated to the maximum possible number of partitions. This patch makes partition array allocation dynamic. The added overhead is minimal as only real change is one memory dereference changed to RCU one. This saves both a bit of memory and cpu cycles iterating through unoccupied slots and makes increasing partition limit easier. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/genhd.c129
-rw-r--r--block/ioctl.c2
-rw-r--r--fs/partitions/check.c31
-rw-r--r--include/linux/genhd.h19
4 files changed, 154 insertions, 27 deletions
diff --git a/block/genhd.c b/block/genhd.c
index e1cb96fb883e..c2b14aa69d58 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -52,14 +52,21 @@ static struct device_type disk_type;
52 */ 52 */
53struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 53struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
54{ 54{
55 struct hd_struct *part; 55 struct hd_struct *part = NULL;
56 struct disk_part_tbl *ptbl;
56 57
57 if (unlikely(partno < 0 || partno >= disk_max_parts(disk))) 58 if (unlikely(partno < 0))
58 return NULL; 59 return NULL;
60
59 rcu_read_lock(); 61 rcu_read_lock();
60 part = rcu_dereference(disk->__part[partno]); 62
61 if (part) 63 ptbl = rcu_dereference(disk->part_tbl);
62 get_device(part_to_dev(part)); 64 if (likely(partno < ptbl->len)) {
65 part = rcu_dereference(ptbl->part[partno]);
66 if (part)
67 get_device(part_to_dev(part));
68 }
69
63 rcu_read_unlock(); 70 rcu_read_unlock();
64 71
65 return part; 72 return part;
@@ -80,17 +87,24 @@ EXPORT_SYMBOL_GPL(disk_get_part);
80void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 87void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
81 unsigned int flags) 88 unsigned int flags)
82{ 89{
90 struct disk_part_tbl *ptbl;
91
92 rcu_read_lock();
93 ptbl = rcu_dereference(disk->part_tbl);
94
83 piter->disk = disk; 95 piter->disk = disk;
84 piter->part = NULL; 96 piter->part = NULL;
85 97
86 if (flags & DISK_PITER_REVERSE) 98 if (flags & DISK_PITER_REVERSE)
87 piter->idx = disk_max_parts(piter->disk) - 1; 99 piter->idx = ptbl->len - 1;
88 else if (flags & DISK_PITER_INCL_PART0) 100 else if (flags & DISK_PITER_INCL_PART0)
89 piter->idx = 0; 101 piter->idx = 0;
90 else 102 else
91 piter->idx = 1; 103 piter->idx = 1;
92 104
93 piter->flags = flags; 105 piter->flags = flags;
106
107 rcu_read_unlock();
94} 108}
95EXPORT_SYMBOL_GPL(disk_part_iter_init); 109EXPORT_SYMBOL_GPL(disk_part_iter_init);
96 110
@@ -105,13 +119,16 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init);
105 */ 119 */
106struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 120struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
107{ 121{
122 struct disk_part_tbl *ptbl;
108 int inc, end; 123 int inc, end;
109 124
110 /* put the last partition */ 125 /* put the last partition */
111 disk_put_part(piter->part); 126 disk_put_part(piter->part);
112 piter->part = NULL; 127 piter->part = NULL;
113 128
129 /* get part_tbl */
114 rcu_read_lock(); 130 rcu_read_lock();
131 ptbl = rcu_dereference(piter->disk->part_tbl);
115 132
116 /* determine iteration parameters */ 133 /* determine iteration parameters */
117 if (piter->flags & DISK_PITER_REVERSE) { 134 if (piter->flags & DISK_PITER_REVERSE) {
@@ -122,14 +139,14 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
122 end = 0; 139 end = 0;
123 } else { 140 } else {
124 inc = 1; 141 inc = 1;
125 end = disk_max_parts(piter->disk); 142 end = ptbl->len;
126 } 143 }
127 144
128 /* iterate to the next partition */ 145 /* iterate to the next partition */
129 for (; piter->idx != end; piter->idx += inc) { 146 for (; piter->idx != end; piter->idx += inc) {
130 struct hd_struct *part; 147 struct hd_struct *part;
131 148
132 part = rcu_dereference(piter->disk->__part[piter->idx]); 149 part = rcu_dereference(ptbl->part[piter->idx]);
133 if (!part) 150 if (!part)
134 continue; 151 continue;
135 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) 152 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
@@ -180,10 +197,13 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
180 */ 197 */
181struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 198struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
182{ 199{
200 struct disk_part_tbl *ptbl;
183 int i; 201 int i;
184 202
185 for (i = 1; i < disk_max_parts(disk); i++) { 203 ptbl = rcu_dereference(disk->part_tbl);
186 struct hd_struct *part = rcu_dereference(disk->__part[i]); 204
205 for (i = 1; i < ptbl->len; i++) {
206 struct hd_struct *part = rcu_dereference(ptbl->part[i]);
187 207
188 if (part && part->start_sect <= sector && 208 if (part && part->start_sect <= sector &&
189 sector < part->start_sect + part->nr_sects) 209 sector < part->start_sect + part->nr_sects)
@@ -798,12 +818,86 @@ static struct attribute_group *disk_attr_groups[] = {
798 NULL 818 NULL
799}; 819};
800 820
821static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
822{
823 struct disk_part_tbl *ptbl =
824 container_of(head, struct disk_part_tbl, rcu_head);
825
826 kfree(ptbl);
827}
828
829/**
830 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
831 * @disk: disk to replace part_tbl for
832 * @new_ptbl: new part_tbl to install
833 *
834 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
835 * original ptbl is freed using RCU callback.
836 *
837 * LOCKING:
838 * Matching bd_mutx locked.
839 */
840static void disk_replace_part_tbl(struct gendisk *disk,
841 struct disk_part_tbl *new_ptbl)
842{
843 struct disk_part_tbl *old_ptbl = disk->part_tbl;
844
845 rcu_assign_pointer(disk->part_tbl, new_ptbl);
846 if (old_ptbl)
847 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
848}
849
850/**
851 * disk_expand_part_tbl - expand disk->part_tbl
852 * @disk: disk to expand part_tbl for
853 * @partno: expand such that this partno can fit in
854 *
855 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
856 * uses RCU to allow unlocked dereferencing for stats and other stuff.
857 *
858 * LOCKING:
859 * Matching bd_mutex locked, might sleep.
860 *
861 * RETURNS:
862 * 0 on success, -errno on failure.
863 */
864int disk_expand_part_tbl(struct gendisk *disk, int partno)
865{
866 struct disk_part_tbl *old_ptbl = disk->part_tbl;
867 struct disk_part_tbl *new_ptbl;
868 int len = old_ptbl ? old_ptbl->len : 0;
869 int target = partno + 1;
870 size_t size;
871 int i;
872
873 /* disk_max_parts() is zero during initialization, ignore if so */
874 if (disk_max_parts(disk) && target > disk_max_parts(disk))
875 return -EINVAL;
876
877 if (target <= len)
878 return 0;
879
880 size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
881 new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
882 if (!new_ptbl)
883 return -ENOMEM;
884
885 INIT_RCU_HEAD(&new_ptbl->rcu_head);
886 new_ptbl->len = target;
887
888 for (i = 0; i < len; i++)
889 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
890
891 disk_replace_part_tbl(disk, new_ptbl);
892 return 0;
893}
894
801static void disk_release(struct device *dev) 895static void disk_release(struct device *dev)
802{ 896{
803 struct gendisk *disk = dev_to_disk(dev); 897 struct gendisk *disk = dev_to_disk(dev);
804 898
805 kfree(disk->random); 899 kfree(disk->random);
806 kfree(disk->__part); 900 disk_replace_part_tbl(disk, NULL);
807 free_part_stats(&disk->part0); 901 free_part_stats(&disk->part0);
808 kfree(disk); 902 kfree(disk);
809} 903}
@@ -948,22 +1042,16 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
948 disk = kmalloc_node(sizeof(struct gendisk), 1042 disk = kmalloc_node(sizeof(struct gendisk),
949 GFP_KERNEL | __GFP_ZERO, node_id); 1043 GFP_KERNEL | __GFP_ZERO, node_id);
950 if (disk) { 1044 if (disk) {
951 int tot_minors = minors + ext_minors;
952 int size = tot_minors * sizeof(struct hd_struct *);
953
954 if (!init_part_stats(&disk->part0)) { 1045 if (!init_part_stats(&disk->part0)) {
955 kfree(disk); 1046 kfree(disk);
956 return NULL; 1047 return NULL;
957 } 1048 }
958 1049 if (disk_expand_part_tbl(disk, 0)) {
959 disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, 1050 free_part_stats(&disk->part0);
960 node_id);
961 if (!disk->__part) {
962 free_part_stats(&disk->part0);
963 kfree(disk); 1051 kfree(disk);
964 return NULL; 1052 return NULL;
965 } 1053 }
966 disk->__part[0] = &disk->part0; 1054 disk->part_tbl->part[0] = &disk->part0;
967 1055
968 disk->minors = minors; 1056 disk->minors = minors;
969 disk->ext_minors = ext_minors; 1057 disk->ext_minors = ext_minors;
@@ -973,6 +1061,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
973 device_initialize(disk_to_dev(disk)); 1061 device_initialize(disk_to_dev(disk));
974 INIT_WORK(&disk->async_notify, 1062 INIT_WORK(&disk->async_notify,
975 media_change_notify_thread); 1063 media_change_notify_thread);
1064 disk->node_id = node_id;
976 } 1065 }
977 return disk; 1066 return disk;
978} 1067}
diff --git a/block/ioctl.c b/block/ioctl.c
index 64e7c67a64b0..38bee321e1fa 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -30,7 +30,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
30 if (bdev != bdev->bd_contains) 30 if (bdev != bdev->bd_contains)
31 return -EINVAL; 31 return -EINVAL;
32 partno = p.pno; 32 partno = p.pno;
33 if (partno <= 0 || partno >= disk_max_parts(disk)) 33 if (partno <= 0)
34 return -EINVAL; 34 return -EINVAL;
35 switch (a.op) { 35 switch (a.op) {
36 case BLKPG_ADD_PARTITION: 36 case BLKPG_ADD_PARTITION:
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f517869e8d10..772b2ed8d239 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -312,14 +312,18 @@ static void delete_partition_rcu_cb(struct rcu_head *head)
312 312
313void delete_partition(struct gendisk *disk, int partno) 313void delete_partition(struct gendisk *disk, int partno)
314{ 314{
315 struct disk_part_tbl *ptbl = disk->part_tbl;
315 struct hd_struct *part; 316 struct hd_struct *part;
316 317
317 part = disk->__part[partno]; 318 if (partno >= ptbl->len)
319 return;
320
321 part = ptbl->part[partno];
318 if (!part) 322 if (!part)
319 return; 323 return;
320 324
321 blk_free_devt(part_devt(part)); 325 blk_free_devt(part_devt(part));
322 rcu_assign_pointer(disk->__part[partno], NULL); 326 rcu_assign_pointer(ptbl->part[partno], NULL);
323 kobject_put(part->holder_dir); 327 kobject_put(part->holder_dir);
324 device_del(part_to_dev(part)); 328 device_del(part_to_dev(part));
325 329
@@ -341,10 +345,16 @@ int add_partition(struct gendisk *disk, int partno,
341 dev_t devt = MKDEV(0, 0); 345 dev_t devt = MKDEV(0, 0);
342 struct device *ddev = disk_to_dev(disk); 346 struct device *ddev = disk_to_dev(disk);
343 struct device *pdev; 347 struct device *pdev;
348 struct disk_part_tbl *ptbl;
344 const char *dname; 349 const char *dname;
345 int err; 350 int err;
346 351
347 if (disk->__part[partno]) 352 err = disk_expand_part_tbl(disk, partno);
353 if (err)
354 return err;
355 ptbl = disk->part_tbl;
356
357 if (ptbl->part[partno])
348 return -EBUSY; 358 return -EBUSY;
349 359
350 p = kzalloc(sizeof(*p), GFP_KERNEL); 360 p = kzalloc(sizeof(*p), GFP_KERNEL);
@@ -398,7 +408,7 @@ int add_partition(struct gendisk *disk, int partno,
398 408
399 /* everything is up and running, commence */ 409 /* everything is up and running, commence */
400 INIT_RCU_HEAD(&p->rcu_head); 410 INIT_RCU_HEAD(&p->rcu_head);
401 rcu_assign_pointer(disk->__part[partno], p); 411 rcu_assign_pointer(ptbl->part[partno], p);
402 412
403 /* suppress uevent if the disk supresses it */ 413 /* suppress uevent if the disk supresses it */
404 if (!ddev->uevent_suppress) 414 if (!ddev->uevent_suppress)
@@ -487,7 +497,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
487 struct disk_part_iter piter; 497 struct disk_part_iter piter;
488 struct hd_struct *part; 498 struct hd_struct *part;
489 struct parsed_partitions *state; 499 struct parsed_partitions *state;
490 int p, res; 500 int p, highest, res;
491 501
492 if (bdev->bd_part_count) 502 if (bdev->bd_part_count)
493 return -EBUSY; 503 return -EBUSY;
@@ -511,6 +521,17 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
511 /* tell userspace that the media / partition table may have changed */ 521 /* tell userspace that the media / partition table may have changed */
512 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 522 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
513 523
524 /* Detect the highest partition number and preallocate
525 * disk->part_tbl. This is an optimization and not strictly
526 * necessary.
527 */
528 for (p = 1, highest = 0; p < state->limit; p++)
529 if (state->parts[p].size)
530 highest = p;
531
532 disk_expand_part_tbl(disk, highest);
533
534 /* add partitions */
514 for (p = 1; p < state->limit; p++) { 535 for (p = 1; p < state->limit; p++) {
515 sector_t size = state->parts[p].size; 536 sector_t size = state->parts[p].size;
516 sector_t from = state->parts[p].from; 537 sector_t from = state->parts[p].from;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c90e1b4fbe5a..ecf649c3deed 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -113,6 +113,21 @@ struct hd_struct {
113#define GENHD_FL_UP 16 113#define GENHD_FL_UP 16
114#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 114#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
115 115
116#define BLK_SCSI_MAX_CMDS (256)
117#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
118
119struct blk_scsi_cmd_filter {
120 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
121 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
122 struct kobject kobj;
123};
124
125struct disk_part_tbl {
126 struct rcu_head rcu_head;
127 int len;
128 struct hd_struct *part[];
129};
130
116struct gendisk { 131struct gendisk {
117 /* major, first_minor, minors and ext_minors are input 132 /* major, first_minor, minors and ext_minors are input
118 * parameters only, don't use directly. Use disk_devt() and 133 * parameters only, don't use directly. Use disk_devt() and
@@ -131,7 +146,7 @@ struct gendisk {
131 * non-critical accesses use RCU. Always access through 146 * non-critical accesses use RCU. Always access through
132 * helpers. 147 * helpers.
133 */ 148 */
134 struct hd_struct **__part; 149 struct disk_part_tbl *part_tbl;
135 struct hd_struct part0; 150 struct hd_struct part0;
136 151
137 struct block_device_operations *fops; 152 struct block_device_operations *fops;
@@ -149,6 +164,7 @@ struct gendisk {
149#ifdef CONFIG_BLK_DEV_INTEGRITY 164#ifdef CONFIG_BLK_DEV_INTEGRITY
150 struct blk_integrity *integrity; 165 struct blk_integrity *integrity;
151#endif 166#endif
167 int node_id;
152}; 168};
153 169
154static inline struct gendisk *part_to_disk(struct hd_struct *part) 170static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@ -503,6 +519,7 @@ extern void blk_free_devt(dev_t devt);
503extern dev_t blk_lookup_devt(const char *name, int partno); 519extern dev_t blk_lookup_devt(const char *name, int partno);
504extern char *disk_name (struct gendisk *hd, int partno, char *buf); 520extern char *disk_name (struct gendisk *hd, int partno, char *buf);
505 521
522extern int disk_expand_part_tbl(struct gendisk *disk, int target);
506extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); 523extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
507extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); 524extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
508extern void delete_partition(struct gendisk *, int); 525extern void delete_partition(struct gendisk *, int);