aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-09-09 19:23:47 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 19:39:10 -0400
commit15945fee6f09bff1f86b1a735b5888dc59cf38e3 (patch)
treeed2f66ceccfa30867035e7ba7be46159e97e4e4d /drivers
parent4b6d287f627b5fb6a49f78f9e81649ff98c62bb7 (diff)
[PATCH] md: support md/linear array with components greater than 2 terabytes.
linear currently uses division by the size of the smallest componenet device to find which device a request goes to. If that smallest device is larger than 2 terabytes, then the division will not work on some systems. So we introduce a pre-shift, and take care not to make the hash table too large, much like the code in raid0. Also get rid of conf->nr_zones, which is not needed. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/linear.c95
1 files changed, 66 insertions, 29 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 4991ba54336..bb279fad2fd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -38,7 +38,8 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
38 /* 38 /*
39 * sector_div(a,b) returns the remainer and sets a to a/b 39 * sector_div(a,b) returns the remainer and sets a to a/b
40 */ 40 */
41 (void)sector_div(block, conf->smallest->size); 41 block >>= conf->preshift;
42 (void)sector_div(block, conf->hash_spacing);
42 hash = conf->hash_table[block]; 43 hash = conf->hash_table[block];
43 44
44 while ((sector>>1) >= (hash->size + hash->offset)) 45 while ((sector>>1) >= (hash->size + hash->offset))
@@ -47,7 +48,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
47} 48}
48 49
49/** 50/**
50 * linear_mergeable_bvec -- tell bio layer if a two requests can be merged 51 * linear_mergeable_bvec -- tell bio layer if two requests can be merged
51 * @q: request queue 52 * @q: request queue
52 * @bio: the buffer head that's been built up so far 53 * @bio: the buffer head that's been built up so far
53 * @biovec: the request that could be merged to it. 54 * @biovec: the request that could be merged to it.
@@ -116,7 +117,7 @@ static int linear_run (mddev_t *mddev)
116 dev_info_t **table; 117 dev_info_t **table;
117 mdk_rdev_t *rdev; 118 mdk_rdev_t *rdev;
118 int i, nb_zone, cnt; 119 int i, nb_zone, cnt;
119 sector_t start; 120 sector_t min_spacing;
120 sector_t curr_offset; 121 sector_t curr_offset;
121 struct list_head *tmp; 122 struct list_head *tmp;
122 123
@@ -127,11 +128,6 @@ static int linear_run (mddev_t *mddev)
127 memset(conf, 0, sizeof(*conf) + mddev->raid_disks*sizeof(dev_info_t)); 128 memset(conf, 0, sizeof(*conf) + mddev->raid_disks*sizeof(dev_info_t));
128 mddev->private = conf; 129 mddev->private = conf;
129 130
130 /*
131 * Find the smallest device.
132 */
133
134 conf->smallest = NULL;
135 cnt = 0; 131 cnt = 0;
136 mddev->array_size = 0; 132 mddev->array_size = 0;
137 133
@@ -159,8 +155,6 @@ static int linear_run (mddev_t *mddev)
159 disk->size = rdev->size; 155 disk->size = rdev->size;
160 mddev->array_size += rdev->size; 156 mddev->array_size += rdev->size;
161 157
162 if (!conf->smallest || (disk->size < conf->smallest->size))
163 conf->smallest = disk;
164 cnt++; 158 cnt++;
165 } 159 }
166 if (cnt != mddev->raid_disks) { 160 if (cnt != mddev->raid_disks) {
@@ -168,6 +162,36 @@ static int linear_run (mddev_t *mddev)
168 goto out; 162 goto out;
169 } 163 }
170 164
165 min_spacing = mddev->array_size;
166 sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));
167
168 /* min_spacing is the minimum spacing that will fit the hash
169 * table in one PAGE. This may be much smaller than needed.
170 * We find the smallest non-terminal set of consecutive devices
171 * that is larger than min_spacing as use the size of that as
172 * the actual spacing
173 */
174 conf->hash_spacing = mddev->array_size;
175 for (i=0; i < cnt-1 ; i++) {
176 sector_t sz = 0;
177 int j;
178 for (j=i; i<cnt-1 && sz < min_spacing ; j++)
179 sz += conf->disks[j].size;
180 if (sz >= min_spacing && sz < conf->hash_spacing)
181 conf->hash_spacing = sz;
182 }
183
184 /* hash_spacing may be too large for sector_div to work with,
185 * so we might need to pre-shift
186 */
187 conf->preshift = 0;
188 if (sizeof(sector_t) > sizeof(u32)) {
189 sector_t space = conf->hash_spacing;
190 while (space > (sector_t)(~(u32)0)) {
191 space >>= 1;
192 conf->preshift++;
193 }
194 }
171 /* 195 /*
172 * This code was restructured to work around a gcc-2.95.3 internal 196 * This code was restructured to work around a gcc-2.95.3 internal
173 * compiler error. Alter it with care. 197 * compiler error. Alter it with care.
@@ -177,39 +201,52 @@ static int linear_run (mddev_t *mddev)
177 unsigned round; 201 unsigned round;
178 unsigned long base; 202 unsigned long base;
179 203
180 sz = mddev->array_size; 204 sz = mddev->array_size >> conf->preshift;
181 base = conf->smallest->size; 205 sz += 1; /* force round-up */
206 base = conf->hash_spacing >> conf->preshift;
182 round = sector_div(sz, base); 207 round = sector_div(sz, base);
183 nb_zone = conf->nr_zones = sz + (round ? 1 : 0); 208 nb_zone = sz + (round ? 1 : 0);
184 } 209 }
185 210 BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *));
186 conf->hash_table = kmalloc (sizeof (dev_info_t*) * nb_zone, 211
212 conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
187 GFP_KERNEL); 213 GFP_KERNEL);
188 if (!conf->hash_table) 214 if (!conf->hash_table)
189 goto out; 215 goto out;
190 216
191 /* 217 /*
192 * Here we generate the linear hash table 218 * Here we generate the linear hash table
219 * First calculate the device offsets.
193 */ 220 */
221 conf->disks[0].offset = 0;
222 for (i=1; i<mddev->raid_disks; i++)
223 conf->disks[i].offset =
224 conf->disks[i-1].offset +
225 conf->disks[i-1].size;
226
194 table = conf->hash_table; 227 table = conf->hash_table;
195 start = 0;
196 curr_offset = 0; 228 curr_offset = 0;
197 for (i = 0; i < cnt; i++) { 229 i = 0;
198 dev_info_t *disk = conf->disks + i; 230 for (curr_offset = 0;
231 curr_offset < mddev->array_size;
232 curr_offset += conf->hash_spacing) {
199 233
200 disk->offset = curr_offset; 234 while (i < mddev->raid_disks-1 &&
201 curr_offset += disk->size; 235 curr_offset >= conf->disks[i+1].offset)
236 i++;
202 237
203 /* 'curr_offset' is the end of this disk 238 *table ++ = conf->disks + i;
204 * 'start' is the start of table 239 }
240
241 if (conf->preshift) {
242 conf->hash_spacing >>= conf->preshift;
243 /* round hash_spacing up so that when we divide by it,
244 * we err on the side of "too-low", which is safest.
205 */ 245 */
206 while (start < curr_offset) { 246 conf->hash_spacing++;
207 *table++ = disk;
208 start += conf->smallest->size;
209 }
210 } 247 }
211 if (table-conf->hash_table != nb_zone) 248
212 BUG(); 249 BUG_ON(table - conf->hash_table > nb_zone);
213 250
214 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); 251 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
215 mddev->queue->unplug_fn = linear_unplug; 252 mddev->queue->unplug_fn = linear_unplug;
@@ -299,7 +336,7 @@ static void linear_status (struct seq_file *seq, mddev_t *mddev)
299 sector_t s = 0; 336 sector_t s = 0;
300 337
301 seq_printf(seq, " "); 338 seq_printf(seq, " ");
302 for (j = 0; j < conf->nr_zones; j++) 339 for (j = 0; j < mddev->raid_disks; j++)
303 { 340 {
304 char b[BDEVNAME_SIZE]; 341 char b[BDEVNAME_SIZE];
305 s += conf->smallest_size; 342 s += conf->smallest_size;