diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-18 16:11:50 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-18 16:11:50 -0400 |
| commit | 9729a6eb5878a3daa18395f2b5fb38bf9359a761 (patch) | |
| tree | 7ea32e9793c659d4059c49e7da2f38da795e7679 | |
| parent | 5ae8606d5746bc84e19018fc3753cc1faf18843f (diff) | |
| parent | 48606a9f2fc034f0b308d088c1f7ab6d407c462c (diff) | |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (39 commits)
md/raid5: correctly update sync_completed when we reach max_resync
md/raid5: add missing call to schedule() after prepare_to_wait()
md/linear: use call_rcu to free obsolete 'conf' structures.
md linear: Protecting mddev with rcu locks to avoid races
md: Move check for bitmap presence to personality code.
md: remove chunksize rounding from common code.
md: raid0/linear: ensure device sizes are rounded to chunk size.
md: move assignment of ->utime so that it never gets skipped.
md: Push down reconstruction log message to personality code.
md: merge reconfig and check_reshape methods.
md: remove unnecessary arguments from ->reconfig method.
md: raid5: check stripe cache is large enough in start_reshape
md: raid0: chunk_sectors cleanups.
md: fix some comments.
md/raid5: Use is_power_of_2() in raid5_reconfig()/raid6_reconfig().
md: convert conf->chunk_size and conf->prev_chunk to sectors.
md: Convert mddev->new_chunk to sectors.
md: Make mddev->chunk_size sector-based.
md: raid0 :Enables chunk size other than powers of 2.
md: prepare for non-power-of-two chunk sizes
...
| -rw-r--r-- | drivers/md/faulty.c | 21 | ||||
| -rw-r--r-- | drivers/md/linear.c | 218 | ||||
| -rw-r--r-- | drivers/md/linear.h | 12 | ||||
| -rw-r--r-- | drivers/md/md.c | 196 | ||||
| -rw-r--r-- | drivers/md/md.h | 14 | ||||
| -rw-r--r-- | drivers/md/multipath.c | 23 | ||||
| -rw-r--r-- | drivers/md/multipath.h | 6 | ||||
| -rw-r--r-- | drivers/md/raid0.c | 403 | ||||
| -rw-r--r-- | drivers/md/raid0.h | 10 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 46 | ||||
| -rw-r--r-- | drivers/md/raid1.h | 6 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 62 | ||||
| -rw-r--r-- | drivers/md/raid10.h | 6 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 218 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 8 | ||||
| -rw-r--r-- | include/linux/raid/md_p.h | 2 |
16 files changed, 588 insertions, 663 deletions
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 8695809b24b0..87d88dbb667f 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
| @@ -255,14 +255,14 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
| 255 | } | 255 | } |
| 256 | 256 | ||
| 257 | 257 | ||
| 258 | static int reconfig(mddev_t *mddev, int layout, int chunk_size) | 258 | static int reshape(mddev_t *mddev) |
| 259 | { | 259 | { |
| 260 | int mode = layout & ModeMask; | 260 | int mode = mddev->new_layout & ModeMask; |
| 261 | int count = layout >> ModeShift; | 261 | int count = mddev->new_layout >> ModeShift; |
| 262 | conf_t *conf = mddev->private; | 262 | conf_t *conf = mddev->private; |
| 263 | 263 | ||
| 264 | if (chunk_size != -1) | 264 | if (mddev->new_layout < 0) |
| 265 | return -EINVAL; | 265 | return 0; |
| 266 | 266 | ||
| 267 | /* new layout */ | 267 | /* new layout */ |
| 268 | if (mode == ClearFaults) | 268 | if (mode == ClearFaults) |
| @@ -279,6 +279,7 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) | |||
| 279 | atomic_set(&conf->counters[mode], count); | 279 | atomic_set(&conf->counters[mode], count); |
| 280 | } else | 280 | } else |
| 281 | return -EINVAL; | 281 | return -EINVAL; |
| 282 | mddev->new_layout = -1; | ||
| 282 | mddev->layout = -1; /* makes sure further changes come through */ | 283 | mddev->layout = -1; /* makes sure further changes come through */ |
| 283 | return 0; | 284 | return 0; |
| 284 | } | 285 | } |
| @@ -298,8 +299,12 @@ static int run(mddev_t *mddev) | |||
| 298 | { | 299 | { |
| 299 | mdk_rdev_t *rdev; | 300 | mdk_rdev_t *rdev; |
| 300 | int i; | 301 | int i; |
| 302 | conf_t *conf; | ||
| 303 | |||
| 304 | if (md_check_no_bitmap(mddev)) | ||
| 305 | return -EINVAL; | ||
| 301 | 306 | ||
| 302 | conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); | 307 | conf = kmalloc(sizeof(*conf), GFP_KERNEL); |
| 303 | if (!conf) | 308 | if (!conf) |
| 304 | return -ENOMEM; | 309 | return -ENOMEM; |
| 305 | 310 | ||
| @@ -315,7 +320,7 @@ static int run(mddev_t *mddev) | |||
| 315 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); | 320 | md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); |
| 316 | mddev->private = conf; | 321 | mddev->private = conf; |
| 317 | 322 | ||
| 318 | reconfig(mddev, mddev->layout, -1); | 323 | reshape(mddev); |
| 319 | 324 | ||
| 320 | return 0; | 325 | return 0; |
| 321 | } | 326 | } |
| @@ -338,7 +343,7 @@ static struct mdk_personality faulty_personality = | |||
| 338 | .run = run, | 343 | .run = run, |
| 339 | .stop = stop, | 344 | .stop = stop, |
| 340 | .status = status, | 345 | .status = status, |
| 341 | .reconfig = reconfig, | 346 | .check_reshape = reshape, |
| 342 | .size = faulty_size, | 347 | .size = faulty_size, |
| 343 | }; | 348 | }; |
| 344 | 349 | ||
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 64f1f3e046e0..15c8b7b25a9b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
| @@ -27,19 +27,27 @@ | |||
| 27 | */ | 27 | */ |
| 28 | static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) | 28 | static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) |
| 29 | { | 29 | { |
| 30 | dev_info_t *hash; | 30 | int lo, mid, hi; |
| 31 | linear_conf_t *conf = mddev_to_conf(mddev); | 31 | linear_conf_t *conf; |
| 32 | sector_t idx = sector >> conf->sector_shift; | 32 | |
| 33 | lo = 0; | ||
| 34 | hi = mddev->raid_disks - 1; | ||
| 35 | conf = rcu_dereference(mddev->private); | ||
| 33 | 36 | ||
| 34 | /* | 37 | /* |
| 35 | * sector_div(a,b) returns the remainer and sets a to a/b | 38 | * Binary Search |
| 36 | */ | 39 | */ |
| 37 | (void)sector_div(idx, conf->spacing); | ||
| 38 | hash = conf->hash_table[idx]; | ||
| 39 | 40 | ||
| 40 | while (sector >= hash->num_sectors + hash->start_sector) | 41 | while (hi > lo) { |
| 41 | hash++; | 42 | |
| 42 | return hash; | 43 | mid = (hi + lo) / 2; |
| 44 | if (sector < conf->disks[mid].end_sector) | ||
| 45 | hi = mid; | ||
| 46 | else | ||
| 47 | lo = mid + 1; | ||
| 48 | } | ||
| 49 | |||
| 50 | return conf->disks + lo; | ||
| 43 | } | 51 | } |
| 44 | 52 | ||
| 45 | /** | 53 | /** |
| @@ -59,8 +67,10 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
| 59 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; | 67 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; |
| 60 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 68 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
| 61 | 69 | ||
| 70 | rcu_read_lock(); | ||
| 62 | dev0 = which_dev(mddev, sector); | 71 | dev0 = which_dev(mddev, sector); |
| 63 | maxsectors = dev0->num_sectors - (sector - dev0->start_sector); | 72 | maxsectors = dev0->end_sector - sector; |
| 73 | rcu_read_unlock(); | ||
| 64 | 74 | ||
| 65 | if (maxsectors < bio_sectors) | 75 | if (maxsectors < bio_sectors) |
| 66 | maxsectors = 0; | 76 | maxsectors = 0; |
| @@ -79,46 +89,57 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
| 79 | static void linear_unplug(struct request_queue *q) | 89 | static void linear_unplug(struct request_queue *q) |
| 80 | { | 90 | { |
| 81 | mddev_t *mddev = q->queuedata; | 91 | mddev_t *mddev = q->queuedata; |
| 82 | linear_conf_t *conf = mddev_to_conf(mddev); | 92 | linear_conf_t *conf; |
| 83 | int i; | 93 | int i; |
| 84 | 94 | ||
| 95 | rcu_read_lock(); | ||
| 96 | conf = rcu_dereference(mddev->private); | ||
| 97 | |||
| 85 | for (i=0; i < mddev->raid_disks; i++) { | 98 | for (i=0; i < mddev->raid_disks; i++) { |
| 86 | struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); | 99 | struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); |
| 87 | blk_unplug(r_queue); | 100 | blk_unplug(r_queue); |
| 88 | } | 101 | } |
| 102 | rcu_read_unlock(); | ||
| 89 | } | 103 | } |
| 90 | 104 | ||
| 91 | static int linear_congested(void *data, int bits) | 105 | static int linear_congested(void *data, int bits) |
| 92 | { | 106 | { |
| 93 | mddev_t *mddev = data; | 107 | mddev_t *mddev = data; |
| 94 | linear_conf_t *conf = mddev_to_conf(mddev); | 108 | linear_conf_t *conf; |
| 95 | int i, ret = 0; | 109 | int i, ret = 0; |
| 96 | 110 | ||
| 111 | rcu_read_lock(); | ||
| 112 | conf = rcu_dereference(mddev->private); | ||
| 113 | |||
| 97 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { | 114 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { |
| 98 | struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); | 115 | struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); |
| 99 | ret |= bdi_congested(&q->backing_dev_info, bits); | 116 | ret |= bdi_congested(&q->backing_dev_info, bits); |
| 100 | } | 117 | } |
| 118 | |||
| 119 | rcu_read_unlock(); | ||
| 101 | return ret; | 120 | return ret; |
| 102 | } | 121 | } |
| 103 | 122 | ||
| 104 | static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) | 123 | static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) |
| 105 | { | 124 | { |
| 106 | linear_conf_t *conf = mddev_to_conf(mddev); | 125 | linear_conf_t *conf; |
| 126 | sector_t array_sectors; | ||
| 107 | 127 | ||
| 128 | rcu_read_lock(); | ||
| 129 | conf = rcu_dereference(mddev->private); | ||
| 108 | WARN_ONCE(sectors || raid_disks, | 130 | WARN_ONCE(sectors || raid_disks, |
| 109 | "%s does not support generic reshape\n", __func__); | 131 | "%s does not support generic reshape\n", __func__); |
| 132 | array_sectors = conf->array_sectors; | ||
| 133 | rcu_read_unlock(); | ||
| 110 | 134 | ||
| 111 | return conf->array_sectors; | 135 | return array_sectors; |
| 112 | } | 136 | } |
| 113 | 137 | ||
| 114 | static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | 138 | static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) |
| 115 | { | 139 | { |
| 116 | linear_conf_t *conf; | 140 | linear_conf_t *conf; |
| 117 | dev_info_t **table; | ||
| 118 | mdk_rdev_t *rdev; | 141 | mdk_rdev_t *rdev; |
| 119 | int i, nb_zone, cnt; | 142 | int i, cnt; |
| 120 | sector_t min_sectors; | ||
| 121 | sector_t curr_sector; | ||
| 122 | 143 | ||
| 123 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), | 144 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), |
| 124 | GFP_KERNEL); | 145 | GFP_KERNEL); |
| @@ -131,6 +152,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
| 131 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 152 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
| 132 | int j = rdev->raid_disk; | 153 | int j = rdev->raid_disk; |
| 133 | dev_info_t *disk = conf->disks + j; | 154 | dev_info_t *disk = conf->disks + j; |
| 155 | sector_t sectors; | ||
| 134 | 156 | ||
| 135 | if (j < 0 || j >= raid_disks || disk->rdev) { | 157 | if (j < 0 || j >= raid_disks || disk->rdev) { |
| 136 | printk("linear: disk numbering problem. Aborting!\n"); | 158 | printk("linear: disk numbering problem. Aborting!\n"); |
| @@ -138,6 +160,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
| 138 | } | 160 | } |
| 139 | 161 | ||
| 140 | disk->rdev = rdev; | 162 | disk->rdev = rdev; |
| 163 | if (mddev->chunk_sectors) { | ||
| 164 | sectors = rdev->sectors; | ||
| 165 | sector_div(sectors, mddev->chunk_sectors); | ||
| 166 | rdev->sectors = sectors * mddev->chunk_sectors; | ||
| 167 | } | ||
| 141 | 168 | ||
| 142 | blk_queue_stack_limits(mddev->queue, | 169 | blk_queue_stack_limits(mddev->queue, |
| 143 | rdev->bdev->bd_disk->queue); | 170 | rdev->bdev->bd_disk->queue); |
| @@ -149,102 +176,24 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
| 149 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | 176 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) |
| 150 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 177 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
| 151 | 178 | ||
| 152 | disk->num_sectors = rdev->sectors; | ||
| 153 | conf->array_sectors += rdev->sectors; | 179 | conf->array_sectors += rdev->sectors; |
| 154 | |||
| 155 | cnt++; | 180 | cnt++; |
| 181 | |||
| 156 | } | 182 | } |
| 157 | if (cnt != raid_disks) { | 183 | if (cnt != raid_disks) { |
| 158 | printk("linear: not enough drives present. Aborting!\n"); | 184 | printk("linear: not enough drives present. Aborting!\n"); |
| 159 | goto out; | 185 | goto out; |
| 160 | } | 186 | } |
| 161 | 187 | ||
| 162 | min_sectors = conf->array_sectors; | ||
| 163 | sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); | ||
| 164 | if (min_sectors == 0) | ||
| 165 | min_sectors = 1; | ||
| 166 | |||
| 167 | /* min_sectors is the minimum spacing that will fit the hash | ||
| 168 | * table in one PAGE. This may be much smaller than needed. | ||
| 169 | * We find the smallest non-terminal set of consecutive devices | ||
| 170 | * that is larger than min_sectors and use the size of that as | ||
| 171 | * the actual spacing | ||
| 172 | */ | ||
| 173 | conf->spacing = conf->array_sectors; | ||
| 174 | for (i=0; i < cnt-1 ; i++) { | ||
| 175 | sector_t tmp = 0; | ||
| 176 | int j; | ||
| 177 | for (j = i; j < cnt - 1 && tmp < min_sectors; j++) | ||
| 178 | tmp += conf->disks[j].num_sectors; | ||
| 179 | if (tmp >= min_sectors && tmp < conf->spacing) | ||
| 180 | conf->spacing = tmp; | ||
| 181 | } | ||
| 182 | |||
| 183 | /* spacing may be too large for sector_div to work with, | ||
| 184 | * so we might need to pre-shift | ||
| 185 | */ | ||
| 186 | conf->sector_shift = 0; | ||
| 187 | if (sizeof(sector_t) > sizeof(u32)) { | ||
| 188 | sector_t space = conf->spacing; | ||
| 189 | while (space > (sector_t)(~(u32)0)) { | ||
| 190 | space >>= 1; | ||
| 191 | conf->sector_shift++; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | /* | 188 | /* |
| 195 | * This code was restructured to work around a gcc-2.95.3 internal | 189 | * Here we calculate the device offsets. |
| 196 | * compiler error. Alter it with care. | ||
| 197 | */ | 190 | */ |
| 198 | { | 191 | conf->disks[0].end_sector = conf->disks[0].rdev->sectors; |
| 199 | sector_t sz; | ||
| 200 | unsigned round; | ||
| 201 | unsigned long base; | ||
| 202 | |||
| 203 | sz = conf->array_sectors >> conf->sector_shift; | ||
| 204 | sz += 1; /* force round-up */ | ||
| 205 | base = conf->spacing >> conf->sector_shift; | ||
| 206 | round = sector_div(sz, base); | ||
| 207 | nb_zone = sz + (round ? 1 : 0); | ||
| 208 | } | ||
| 209 | BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *)); | ||
| 210 | |||
| 211 | conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone, | ||
| 212 | GFP_KERNEL); | ||
| 213 | if (!conf->hash_table) | ||
| 214 | goto out; | ||
| 215 | 192 | ||
| 216 | /* | ||
| 217 | * Here we generate the linear hash table | ||
| 218 | * First calculate the device offsets. | ||
| 219 | */ | ||
| 220 | conf->disks[0].start_sector = 0; | ||
| 221 | for (i = 1; i < raid_disks; i++) | 193 | for (i = 1; i < raid_disks; i++) |
| 222 | conf->disks[i].start_sector = | 194 | conf->disks[i].end_sector = |
| 223 | conf->disks[i-1].start_sector + | 195 | conf->disks[i-1].end_sector + |
| 224 | conf->disks[i-1].num_sectors; | 196 | conf->disks[i].rdev->sectors; |
| 225 | |||
| 226 | table = conf->hash_table; | ||
| 227 | i = 0; | ||
| 228 | for (curr_sector = 0; | ||
| 229 | curr_sector < conf->array_sectors; | ||
| 230 | curr_sector += conf->spacing) { | ||
| 231 | |||
| 232 | while (i < raid_disks-1 && | ||
| 233 | curr_sector >= conf->disks[i+1].start_sector) | ||
| 234 | i++; | ||
| 235 | |||
| 236 | *table ++ = conf->disks + i; | ||
| 237 | } | ||
| 238 | |||
| 239 | if (conf->sector_shift) { | ||
| 240 | conf->spacing >>= conf->sector_shift; | ||
| 241 | /* round spacing up so that when we divide by it, | ||
| 242 | * we err on the side of "too-low", which is safest. | ||
| 243 | */ | ||
| 244 | conf->spacing++; | ||
| 245 | } | ||
| 246 | |||
| 247 | BUG_ON(table - conf->hash_table > nb_zone); | ||
| 248 | 197 | ||
| 249 | return conf; | 198 | return conf; |
| 250 | 199 | ||
| @@ -257,6 +206,8 @@ static int linear_run (mddev_t *mddev) | |||
| 257 | { | 206 | { |
| 258 | linear_conf_t *conf; | 207 | linear_conf_t *conf; |
| 259 | 208 | ||
| 209 | if (md_check_no_bitmap(mddev)) | ||
| 210 | return -EINVAL; | ||
| 260 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; | 211 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; |
| 261 | conf = linear_conf(mddev, mddev->raid_disks); | 212 | conf = linear_conf(mddev, mddev->raid_disks); |
| 262 | 213 | ||
| @@ -272,6 +223,12 @@ static int linear_run (mddev_t *mddev) | |||
| 272 | return 0; | 223 | return 0; |
| 273 | } | 224 | } |
| 274 | 225 | ||
| 226 | static void free_conf(struct rcu_head *head) | ||
| 227 | { | ||
| 228 | linear_conf_t *conf = container_of(head, linear_conf_t, rcu); | ||
| 229 | kfree(conf); | ||
| 230 | } | ||
| 231 | |||
| 275 | static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | 232 | static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) |
| 276 | { | 233 | { |
| 277 | /* Adding a drive to a linear array allows the array to grow. | 234 | /* Adding a drive to a linear array allows the array to grow. |
| @@ -282,7 +239,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 282 | * The current one is never freed until the array is stopped. | 239 | * The current one is never freed until the array is stopped. |
| 283 | * This avoids races. | 240 | * This avoids races. |
| 284 | */ | 241 | */ |
| 285 | linear_conf_t *newconf; | 242 | linear_conf_t *newconf, *oldconf; |
| 286 | 243 | ||
| 287 | if (rdev->saved_raid_disk != mddev->raid_disks) | 244 | if (rdev->saved_raid_disk != mddev->raid_disks) |
| 288 | return -EINVAL; | 245 | return -EINVAL; |
| @@ -294,25 +251,29 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 294 | if (!newconf) | 251 | if (!newconf) |
| 295 | return -ENOMEM; | 252 | return -ENOMEM; |
| 296 | 253 | ||
| 297 | newconf->prev = mddev_to_conf(mddev); | 254 | oldconf = rcu_dereference(mddev->private); |
| 298 | mddev->private = newconf; | ||
| 299 | mddev->raid_disks++; | 255 | mddev->raid_disks++; |
| 256 | rcu_assign_pointer(mddev->private, newconf); | ||
| 300 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
| 301 | set_capacity(mddev->gendisk, mddev->array_sectors); | 258 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 259 | call_rcu(&oldconf->rcu, free_conf); | ||
| 302 | return 0; | 260 | return 0; |
| 303 | } | 261 | } |
| 304 | 262 | ||
| 305 | static int linear_stop (mddev_t *mddev) | 263 | static int linear_stop (mddev_t *mddev) |
| 306 | { | 264 | { |
| 307 | linear_conf_t *conf = mddev_to_conf(mddev); | 265 | linear_conf_t *conf = mddev->private; |
| 308 | 266 | ||
| 267 | /* | ||
| 268 | * We do not require rcu protection here since | ||
| 269 | * we hold reconfig_mutex for both linear_add and | ||
| 270 | * linear_stop, so they cannot race. | ||
| 271 | * We should make sure any old 'conf's are properly | ||
| 272 | * freed though. | ||
| 273 | */ | ||
| 274 | rcu_barrier(); | ||
| 309 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 275 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
| 310 | do { | 276 | kfree(conf); |
| 311 | linear_conf_t *t = conf->prev; | ||
| 312 | kfree(conf->hash_table); | ||
| 313 | kfree(conf); | ||
| 314 | conf = t; | ||
| 315 | } while (conf); | ||
| 316 | 277 | ||
| 317 | return 0; | 278 | return 0; |
| 318 | } | 279 | } |
| @@ -322,6 +283,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
| 322 | const int rw = bio_data_dir(bio); | 283 | const int rw = bio_data_dir(bio); |
| 323 | mddev_t *mddev = q->queuedata; | 284 | mddev_t *mddev = q->queuedata; |
| 324 | dev_info_t *tmp_dev; | 285 | dev_info_t *tmp_dev; |
| 286 | sector_t start_sector; | ||
| 325 | int cpu; | 287 | int cpu; |
| 326 | 288 | ||
| 327 | if (unlikely(bio_barrier(bio))) { | 289 | if (unlikely(bio_barrier(bio))) { |
| @@ -335,33 +297,36 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
| 335 | bio_sectors(bio)); | 297 | bio_sectors(bio)); |
| 336 | part_stat_unlock(); | 298 | part_stat_unlock(); |
| 337 | 299 | ||
| 300 | rcu_read_lock(); | ||
| 338 | tmp_dev = which_dev(mddev, bio->bi_sector); | 301 | tmp_dev = which_dev(mddev, bio->bi_sector); |
| 339 | 302 | start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; | |
| 340 | if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + | 303 | |
| 341 | tmp_dev->start_sector) | 304 | |
| 342 | || (bio->bi_sector < | 305 | if (unlikely(bio->bi_sector >= (tmp_dev->end_sector) |
| 343 | tmp_dev->start_sector))) { | 306 | || (bio->bi_sector < start_sector))) { |
| 344 | char b[BDEVNAME_SIZE]; | 307 | char b[BDEVNAME_SIZE]; |
| 345 | 308 | ||
| 346 | printk("linear_make_request: Sector %llu out of bounds on " | 309 | printk("linear_make_request: Sector %llu out of bounds on " |
| 347 | "dev %s: %llu sectors, offset %llu\n", | 310 | "dev %s: %llu sectors, offset %llu\n", |
| 348 | (unsigned long long)bio->bi_sector, | 311 | (unsigned long long)bio->bi_sector, |
| 349 | bdevname(tmp_dev->rdev->bdev, b), | 312 | bdevname(tmp_dev->rdev->bdev, b), |
| 350 | (unsigned long long)tmp_dev->num_sectors, | 313 | (unsigned long long)tmp_dev->rdev->sectors, |
| 351 | (unsigned long long)tmp_dev->start_sector); | 314 | (unsigned long long)start_sector); |
| 315 | rcu_read_unlock(); | ||
| 352 | bio_io_error(bio); | 316 | bio_io_error(bio); |
| 353 | return 0; | 317 | return 0; |
| 354 | } | 318 | } |
| 355 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > | 319 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > |
| 356 | tmp_dev->start_sector + tmp_dev->num_sectors)) { | 320 | tmp_dev->end_sector)) { |
| 357 | /* This bio crosses a device boundary, so we have to | 321 | /* This bio crosses a device boundary, so we have to |
| 358 | * split it. | 322 | * split it. |
| 359 | */ | 323 | */ |
| 360 | struct bio_pair *bp; | 324 | struct bio_pair *bp; |
| 325 | sector_t end_sector = tmp_dev->end_sector; | ||
| 326 | |||
| 327 | rcu_read_unlock(); | ||
| 361 | 328 | ||
| 362 | bp = bio_split(bio, | 329 | bp = bio_split(bio, end_sector - bio->bi_sector); |
| 363 | tmp_dev->start_sector + tmp_dev->num_sectors | ||
| 364 | - bio->bi_sector); | ||
| 365 | 330 | ||
| 366 | if (linear_make_request(q, &bp->bio1)) | 331 | if (linear_make_request(q, &bp->bio1)) |
| 367 | generic_make_request(&bp->bio1); | 332 | generic_make_request(&bp->bio1); |
| @@ -372,8 +337,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
| 372 | } | 337 | } |
| 373 | 338 | ||
| 374 | bio->bi_bdev = tmp_dev->rdev->bdev; | 339 | bio->bi_bdev = tmp_dev->rdev->bdev; |
| 375 | bio->bi_sector = bio->bi_sector - tmp_dev->start_sector | 340 | bio->bi_sector = bio->bi_sector - start_sector |
| 376 | + tmp_dev->rdev->data_offset; | 341 | + tmp_dev->rdev->data_offset; |
| 342 | rcu_read_unlock(); | ||
| 377 | 343 | ||
| 378 | return 1; | 344 | return 1; |
| 379 | } | 345 | } |
| @@ -381,7 +347,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) | |||
| 381 | static void linear_status (struct seq_file *seq, mddev_t *mddev) | 347 | static void linear_status (struct seq_file *seq, mddev_t *mddev) |
| 382 | { | 348 | { |
| 383 | 349 | ||
| 384 | seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); | 350 | seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); |
| 385 | } | 351 | } |
| 386 | 352 | ||
| 387 | 353 | ||
diff --git a/drivers/md/linear.h b/drivers/md/linear.h index bf8179587f95..0ce29b61605a 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h | |||
| @@ -3,27 +3,19 @@ | |||
| 3 | 3 | ||
| 4 | struct dev_info { | 4 | struct dev_info { |
| 5 | mdk_rdev_t *rdev; | 5 | mdk_rdev_t *rdev; |
| 6 | sector_t num_sectors; | 6 | sector_t end_sector; |
| 7 | sector_t start_sector; | ||
| 8 | }; | 7 | }; |
| 9 | 8 | ||
| 10 | typedef struct dev_info dev_info_t; | 9 | typedef struct dev_info dev_info_t; |
| 11 | 10 | ||
| 12 | struct linear_private_data | 11 | struct linear_private_data |
| 13 | { | 12 | { |
| 14 | struct linear_private_data *prev; /* earlier version */ | ||
| 15 | dev_info_t **hash_table; | ||
| 16 | sector_t spacing; | ||
| 17 | sector_t array_sectors; | 13 | sector_t array_sectors; |
| 18 | int sector_shift; /* shift before dividing | ||
| 19 | * by spacing | ||
| 20 | */ | ||
| 21 | dev_info_t disks[0]; | 14 | dev_info_t disks[0]; |
| 15 | struct rcu_head rcu; | ||
| 22 | }; | 16 | }; |
| 23 | 17 | ||
| 24 | 18 | ||
| 25 | typedef struct linear_private_data linear_conf_t; | 19 | typedef struct linear_private_data linear_conf_t; |
| 26 | 20 | ||
| 27 | #define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) | ||
| 28 | |||
| 29 | #endif | 21 | #endif |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 20f6ac338349..09be637d52cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -440,15 +440,6 @@ static inline sector_t calc_dev_sboffset(struct block_device *bdev) | |||
| 440 | return MD_NEW_SIZE_SECTORS(num_sectors); | 440 | return MD_NEW_SIZE_SECTORS(num_sectors); |
| 441 | } | 441 | } |
| 442 | 442 | ||
| 443 | static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) | ||
| 444 | { | ||
| 445 | sector_t num_sectors = rdev->sb_start; | ||
| 446 | |||
| 447 | if (chunk_size) | ||
| 448 | num_sectors &= ~((sector_t)chunk_size/512 - 1); | ||
| 449 | return num_sectors; | ||
| 450 | } | ||
| 451 | |||
| 452 | static int alloc_disk_sb(mdk_rdev_t * rdev) | 443 | static int alloc_disk_sb(mdk_rdev_t * rdev) |
| 453 | { | 444 | { |
| 454 | if (rdev->sb_page) | 445 | if (rdev->sb_page) |
| @@ -745,6 +736,24 @@ struct super_type { | |||
| 745 | }; | 736 | }; |
| 746 | 737 | ||
| 747 | /* | 738 | /* |
| 739 | * Check that the given mddev has no bitmap. | ||
| 740 | * | ||
| 741 | * This function is called from the run method of all personalities that do not | ||
| 742 | * support bitmaps. It prints an error message and returns non-zero if mddev | ||
| 743 | * has a bitmap. Otherwise, it returns 0. | ||
| 744 | * | ||
| 745 | */ | ||
| 746 | int md_check_no_bitmap(mddev_t *mddev) | ||
| 747 | { | ||
| 748 | if (!mddev->bitmap_file && !mddev->bitmap_offset) | ||
| 749 | return 0; | ||
| 750 | printk(KERN_ERR "%s: bitmaps are not supported for %s\n", | ||
| 751 | mdname(mddev), mddev->pers->name); | ||
| 752 | return 1; | ||
| 753 | } | ||
| 754 | EXPORT_SYMBOL(md_check_no_bitmap); | ||
| 755 | |||
| 756 | /* | ||
| 748 | * load_super for 0.90.0 | 757 | * load_super for 0.90.0 |
| 749 | */ | 758 | */ |
| 750 | static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | 759 | static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) |
| @@ -797,17 +806,6 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
| 797 | rdev->data_offset = 0; | 806 | rdev->data_offset = 0; |
| 798 | rdev->sb_size = MD_SB_BYTES; | 807 | rdev->sb_size = MD_SB_BYTES; |
| 799 | 808 | ||
| 800 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) { | ||
| 801 | if (sb->level != 1 && sb->level != 4 | ||
| 802 | && sb->level != 5 && sb->level != 6 | ||
| 803 | && sb->level != 10) { | ||
| 804 | /* FIXME use a better test */ | ||
| 805 | printk(KERN_WARNING | ||
| 806 | "md: bitmaps not supported for this level.\n"); | ||
| 807 | goto abort; | ||
| 808 | } | ||
| 809 | } | ||
| 810 | |||
| 811 | if (sb->level == LEVEL_MULTIPATH) | 809 | if (sb->level == LEVEL_MULTIPATH) |
| 812 | rdev->desc_nr = -1; | 810 | rdev->desc_nr = -1; |
| 813 | else | 811 | else |
| @@ -836,7 +834,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
| 836 | else | 834 | else |
| 837 | ret = 0; | 835 | ret = 0; |
| 838 | } | 836 | } |
| 839 | rdev->sectors = calc_num_sectors(rdev, sb->chunk_size); | 837 | rdev->sectors = rdev->sb_start; |
| 840 | 838 | ||
| 841 | if (rdev->sectors < sb->size * 2 && sb->level > 1) | 839 | if (rdev->sectors < sb->size * 2 && sb->level > 1) |
| 842 | /* "this cannot possibly happen" ... */ | 840 | /* "this cannot possibly happen" ... */ |
| @@ -866,7 +864,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 866 | mddev->minor_version = sb->minor_version; | 864 | mddev->minor_version = sb->minor_version; |
| 867 | mddev->patch_version = sb->patch_version; | 865 | mddev->patch_version = sb->patch_version; |
| 868 | mddev->external = 0; | 866 | mddev->external = 0; |
| 869 | mddev->chunk_size = sb->chunk_size; | 867 | mddev->chunk_sectors = sb->chunk_size >> 9; |
| 870 | mddev->ctime = sb->ctime; | 868 | mddev->ctime = sb->ctime; |
| 871 | mddev->utime = sb->utime; | 869 | mddev->utime = sb->utime; |
| 872 | mddev->level = sb->level; | 870 | mddev->level = sb->level; |
| @@ -883,13 +881,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 883 | mddev->delta_disks = sb->delta_disks; | 881 | mddev->delta_disks = sb->delta_disks; |
| 884 | mddev->new_level = sb->new_level; | 882 | mddev->new_level = sb->new_level; |
| 885 | mddev->new_layout = sb->new_layout; | 883 | mddev->new_layout = sb->new_layout; |
| 886 | mddev->new_chunk = sb->new_chunk; | 884 | mddev->new_chunk_sectors = sb->new_chunk >> 9; |
| 887 | } else { | 885 | } else { |
| 888 | mddev->reshape_position = MaxSector; | 886 | mddev->reshape_position = MaxSector; |
| 889 | mddev->delta_disks = 0; | 887 | mddev->delta_disks = 0; |
| 890 | mddev->new_level = mddev->level; | 888 | mddev->new_level = mddev->level; |
| 891 | mddev->new_layout = mddev->layout; | 889 | mddev->new_layout = mddev->layout; |
| 892 | mddev->new_chunk = mddev->chunk_size; | 890 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 893 | } | 891 | } |
| 894 | 892 | ||
| 895 | if (sb->state & (1<<MD_SB_CLEAN)) | 893 | if (sb->state & (1<<MD_SB_CLEAN)) |
| @@ -1004,7 +1002,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1004 | sb->new_level = mddev->new_level; | 1002 | sb->new_level = mddev->new_level; |
| 1005 | sb->delta_disks = mddev->delta_disks; | 1003 | sb->delta_disks = mddev->delta_disks; |
| 1006 | sb->new_layout = mddev->new_layout; | 1004 | sb->new_layout = mddev->new_layout; |
| 1007 | sb->new_chunk = mddev->new_chunk; | 1005 | sb->new_chunk = mddev->new_chunk_sectors << 9; |
| 1008 | } | 1006 | } |
| 1009 | mddev->minor_version = sb->minor_version; | 1007 | mddev->minor_version = sb->minor_version; |
| 1010 | if (mddev->in_sync) | 1008 | if (mddev->in_sync) |
| @@ -1018,7 +1016,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1018 | sb->recovery_cp = 0; | 1016 | sb->recovery_cp = 0; |
| 1019 | 1017 | ||
| 1020 | sb->layout = mddev->layout; | 1018 | sb->layout = mddev->layout; |
| 1021 | sb->chunk_size = mddev->chunk_size; | 1019 | sb->chunk_size = mddev->chunk_sectors << 9; |
| 1022 | 1020 | ||
| 1023 | if (mddev->bitmap && mddev->bitmap_file == NULL) | 1021 | if (mddev->bitmap && mddev->bitmap_file == NULL) |
| 1024 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 1022 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
| @@ -1185,17 +1183,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 1185 | bdevname(rdev->bdev,b)); | 1183 | bdevname(rdev->bdev,b)); |
| 1186 | return -EINVAL; | 1184 | return -EINVAL; |
| 1187 | } | 1185 | } |
| 1188 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) { | ||
| 1189 | if (sb->level != cpu_to_le32(1) && | ||
| 1190 | sb->level != cpu_to_le32(4) && | ||
| 1191 | sb->level != cpu_to_le32(5) && | ||
| 1192 | sb->level != cpu_to_le32(6) && | ||
| 1193 | sb->level != cpu_to_le32(10)) { | ||
| 1194 | printk(KERN_WARNING | ||
| 1195 | "md: bitmaps not supported for this level.\n"); | ||
| 1196 | return -EINVAL; | ||
| 1197 | } | ||
| 1198 | } | ||
| 1199 | 1186 | ||
| 1200 | rdev->preferred_minor = 0xffff; | 1187 | rdev->preferred_minor = 0xffff; |
| 1201 | rdev->data_offset = le64_to_cpu(sb->data_offset); | 1188 | rdev->data_offset = le64_to_cpu(sb->data_offset); |
| @@ -1248,9 +1235,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 1248 | if (rdev->sectors < le64_to_cpu(sb->data_size)) | 1235 | if (rdev->sectors < le64_to_cpu(sb->data_size)) |
| 1249 | return -EINVAL; | 1236 | return -EINVAL; |
| 1250 | rdev->sectors = le64_to_cpu(sb->data_size); | 1237 | rdev->sectors = le64_to_cpu(sb->data_size); |
| 1251 | if (le32_to_cpu(sb->chunksize)) | ||
| 1252 | rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1); | ||
| 1253 | |||
| 1254 | if (le64_to_cpu(sb->size) > rdev->sectors) | 1238 | if (le64_to_cpu(sb->size) > rdev->sectors) |
| 1255 | return -EINVAL; | 1239 | return -EINVAL; |
| 1256 | return ret; | 1240 | return ret; |
| @@ -1271,7 +1255,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1271 | mddev->major_version = 1; | 1255 | mddev->major_version = 1; |
| 1272 | mddev->patch_version = 0; | 1256 | mddev->patch_version = 0; |
| 1273 | mddev->external = 0; | 1257 | mddev->external = 0; |
| 1274 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; | 1258 | mddev->chunk_sectors = le32_to_cpu(sb->chunksize); |
| 1275 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); | 1259 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); |
| 1276 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); | 1260 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); |
| 1277 | mddev->level = le32_to_cpu(sb->level); | 1261 | mddev->level = le32_to_cpu(sb->level); |
| @@ -1297,13 +1281,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1297 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); | 1281 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); |
| 1298 | mddev->new_level = le32_to_cpu(sb->new_level); | 1282 | mddev->new_level = le32_to_cpu(sb->new_level); |
| 1299 | mddev->new_layout = le32_to_cpu(sb->new_layout); | 1283 | mddev->new_layout = le32_to_cpu(sb->new_layout); |
| 1300 | mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; | 1284 | mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); |
| 1301 | } else { | 1285 | } else { |
| 1302 | mddev->reshape_position = MaxSector; | 1286 | mddev->reshape_position = MaxSector; |
| 1303 | mddev->delta_disks = 0; | 1287 | mddev->delta_disks = 0; |
| 1304 | mddev->new_level = mddev->level; | 1288 | mddev->new_level = mddev->level; |
| 1305 | mddev->new_layout = mddev->layout; | 1289 | mddev->new_layout = mddev->layout; |
| 1306 | mddev->new_chunk = mddev->chunk_size; | 1290 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 1307 | } | 1291 | } |
| 1308 | 1292 | ||
| 1309 | } else if (mddev->pers == NULL) { | 1293 | } else if (mddev->pers == NULL) { |
| @@ -1375,7 +1359,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1375 | 1359 | ||
| 1376 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); | 1360 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); |
| 1377 | sb->size = cpu_to_le64(mddev->dev_sectors); | 1361 | sb->size = cpu_to_le64(mddev->dev_sectors); |
| 1378 | sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); | 1362 | sb->chunksize = cpu_to_le32(mddev->chunk_sectors); |
| 1379 | sb->level = cpu_to_le32(mddev->level); | 1363 | sb->level = cpu_to_le32(mddev->level); |
| 1380 | sb->layout = cpu_to_le32(mddev->layout); | 1364 | sb->layout = cpu_to_le32(mddev->layout); |
| 1381 | 1365 | ||
| @@ -1402,7 +1386,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1402 | sb->new_layout = cpu_to_le32(mddev->new_layout); | 1386 | sb->new_layout = cpu_to_le32(mddev->new_layout); |
| 1403 | sb->delta_disks = cpu_to_le32(mddev->delta_disks); | 1387 | sb->delta_disks = cpu_to_le32(mddev->delta_disks); |
| 1404 | sb->new_level = cpu_to_le32(mddev->new_level); | 1388 | sb->new_level = cpu_to_le32(mddev->new_level); |
| 1405 | sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); | 1389 | sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); |
| 1406 | } | 1390 | } |
| 1407 | 1391 | ||
| 1408 | max_dev = 0; | 1392 | max_dev = 0; |
| @@ -1897,6 +1881,7 @@ static void md_update_sb(mddev_t * mddev, int force_change) | |||
| 1897 | int sync_req; | 1881 | int sync_req; |
| 1898 | int nospares = 0; | 1882 | int nospares = 0; |
| 1899 | 1883 | ||
| 1884 | mddev->utime = get_seconds(); | ||
| 1900 | if (mddev->external) | 1885 | if (mddev->external) |
| 1901 | return; | 1886 | return; |
| 1902 | repeat: | 1887 | repeat: |
| @@ -1926,7 +1911,6 @@ repeat: | |||
| 1926 | nospares = 0; | 1911 | nospares = 0; |
| 1927 | 1912 | ||
| 1928 | sync_req = mddev->in_sync; | 1913 | sync_req = mddev->in_sync; |
| 1929 | mddev->utime = get_seconds(); | ||
| 1930 | 1914 | ||
| 1931 | /* If this is just a dirty<->clean transition, and the array is clean | 1915 | /* If this is just a dirty<->clean transition, and the array is clean |
| 1932 | * and 'events' is odd, we can roll back to the previous clean state */ | 1916 | * and 'events' is odd, we can roll back to the previous clean state */ |
| @@ -2597,15 +2581,6 @@ static void analyze_sbs(mddev_t * mddev) | |||
| 2597 | clear_bit(In_sync, &rdev->flags); | 2581 | clear_bit(In_sync, &rdev->flags); |
| 2598 | } | 2582 | } |
| 2599 | } | 2583 | } |
| 2600 | |||
| 2601 | |||
| 2602 | |||
| 2603 | if (mddev->recovery_cp != MaxSector && | ||
| 2604 | mddev->level >= 1) | ||
| 2605 | printk(KERN_ERR "md: %s: raid array is not clean" | ||
| 2606 | " -- starting background reconstruction\n", | ||
| 2607 | mdname(mddev)); | ||
| 2608 | |||
| 2609 | } | 2584 | } |
| 2610 | 2585 | ||
| 2611 | static void md_safemode_timeout(unsigned long data); | 2586 | static void md_safemode_timeout(unsigned long data); |
| @@ -2746,7 +2721,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2746 | if (IS_ERR(priv)) { | 2721 | if (IS_ERR(priv)) { |
| 2747 | mddev->new_level = mddev->level; | 2722 | mddev->new_level = mddev->level; |
| 2748 | mddev->new_layout = mddev->layout; | 2723 | mddev->new_layout = mddev->layout; |
| 2749 | mddev->new_chunk = mddev->chunk_size; | 2724 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 2750 | mddev->raid_disks -= mddev->delta_disks; | 2725 | mddev->raid_disks -= mddev->delta_disks; |
| 2751 | mddev->delta_disks = 0; | 2726 | mddev->delta_disks = 0; |
| 2752 | module_put(pers->owner); | 2727 | module_put(pers->owner); |
| @@ -2764,7 +2739,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2764 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2739 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| 2765 | mddev->level = mddev->new_level; | 2740 | mddev->level = mddev->new_level; |
| 2766 | mddev->layout = mddev->new_layout; | 2741 | mddev->layout = mddev->new_layout; |
| 2767 | mddev->chunk_size = mddev->new_chunk; | 2742 | mddev->chunk_sectors = mddev->new_chunk_sectors; |
| 2768 | mddev->delta_disks = 0; | 2743 | mddev->delta_disks = 0; |
| 2769 | pers->run(mddev); | 2744 | pers->run(mddev); |
| 2770 | mddev_resume(mddev); | 2745 | mddev_resume(mddev); |
| @@ -2800,11 +2775,14 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2800 | 2775 | ||
| 2801 | if (mddev->pers) { | 2776 | if (mddev->pers) { |
| 2802 | int err; | 2777 | int err; |
| 2803 | if (mddev->pers->reconfig == NULL) | 2778 | if (mddev->pers->check_reshape == NULL) |
| 2804 | return -EBUSY; | 2779 | return -EBUSY; |
| 2805 | err = mddev->pers->reconfig(mddev, n, -1); | 2780 | mddev->new_layout = n; |
| 2806 | if (err) | 2781 | err = mddev->pers->check_reshape(mddev); |
| 2782 | if (err) { | ||
| 2783 | mddev->new_layout = mddev->layout; | ||
| 2807 | return err; | 2784 | return err; |
| 2785 | } | ||
| 2808 | } else { | 2786 | } else { |
| 2809 | mddev->new_layout = n; | 2787 | mddev->new_layout = n; |
| 2810 | if (mddev->reshape_position == MaxSector) | 2788 | if (mddev->reshape_position == MaxSector) |
| @@ -2857,10 +2835,11 @@ static ssize_t | |||
| 2857 | chunk_size_show(mddev_t *mddev, char *page) | 2835 | chunk_size_show(mddev_t *mddev, char *page) |
| 2858 | { | 2836 | { |
| 2859 | if (mddev->reshape_position != MaxSector && | 2837 | if (mddev->reshape_position != MaxSector && |
| 2860 | mddev->chunk_size != mddev->new_chunk) | 2838 | mddev->chunk_sectors != mddev->new_chunk_sectors) |
| 2861 | return sprintf(page, "%d (%d)\n", mddev->new_chunk, | 2839 | return sprintf(page, "%d (%d)\n", |
| 2862 | mddev->chunk_size); | 2840 | mddev->new_chunk_sectors << 9, |
| 2863 | return sprintf(page, "%d\n", mddev->chunk_size); | 2841 | mddev->chunk_sectors << 9); |
| 2842 | return sprintf(page, "%d\n", mddev->chunk_sectors << 9); | ||
| 2864 | } | 2843 | } |
| 2865 | 2844 | ||
| 2866 | static ssize_t | 2845 | static ssize_t |
| @@ -2874,15 +2853,18 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2874 | 2853 | ||
| 2875 | if (mddev->pers) { | 2854 | if (mddev->pers) { |
| 2876 | int err; | 2855 | int err; |
| 2877 | if (mddev->pers->reconfig == NULL) | 2856 | if (mddev->pers->check_reshape == NULL) |
| 2878 | return -EBUSY; | 2857 | return -EBUSY; |
| 2879 | err = mddev->pers->reconfig(mddev, -1, n); | 2858 | mddev->new_chunk_sectors = n >> 9; |
| 2880 | if (err) | 2859 | err = mddev->pers->check_reshape(mddev); |
| 2860 | if (err) { | ||
| 2861 | mddev->new_chunk_sectors = mddev->chunk_sectors; | ||
| 2881 | return err; | 2862 | return err; |
| 2863 | } | ||
| 2882 | } else { | 2864 | } else { |
| 2883 | mddev->new_chunk = n; | 2865 | mddev->new_chunk_sectors = n >> 9; |
| 2884 | if (mddev->reshape_position == MaxSector) | 2866 | if (mddev->reshape_position == MaxSector) |
| 2885 | mddev->chunk_size = n; | 2867 | mddev->chunk_sectors = n >> 9; |
| 2886 | } | 2868 | } |
| 2887 | return len; | 2869 | return len; |
| 2888 | } | 2870 | } |
| @@ -3527,8 +3509,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3527 | return -EBUSY; | 3509 | return -EBUSY; |
| 3528 | 3510 | ||
| 3529 | /* Must be a multiple of chunk_size */ | 3511 | /* Must be a multiple of chunk_size */ |
| 3530 | if (mddev->chunk_size) { | 3512 | if (mddev->chunk_sectors) { |
| 3531 | if (min & (sector_t)((mddev->chunk_size>>9)-1)) | 3513 | sector_t temp = min; |
| 3514 | if (sector_div(temp, mddev->chunk_sectors)) | ||
| 3532 | return -EINVAL; | 3515 | return -EINVAL; |
| 3533 | } | 3516 | } |
| 3534 | mddev->resync_min = min; | 3517 | mddev->resync_min = min; |
| @@ -3564,8 +3547,9 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3564 | return -EBUSY; | 3547 | return -EBUSY; |
| 3565 | 3548 | ||
| 3566 | /* Must be a multiple of chunk_size */ | 3549 | /* Must be a multiple of chunk_size */ |
| 3567 | if (mddev->chunk_size) { | 3550 | if (mddev->chunk_sectors) { |
| 3568 | if (max & (sector_t)((mddev->chunk_size>>9)-1)) | 3551 | sector_t temp = max; |
| 3552 | if (sector_div(temp, mddev->chunk_sectors)) | ||
| 3569 | return -EINVAL; | 3553 | return -EINVAL; |
| 3570 | } | 3554 | } |
| 3571 | mddev->resync_max = max; | 3555 | mddev->resync_max = max; |
| @@ -3656,7 +3640,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3656 | mddev->delta_disks = 0; | 3640 | mddev->delta_disks = 0; |
| 3657 | mddev->new_level = mddev->level; | 3641 | mddev->new_level = mddev->level; |
| 3658 | mddev->new_layout = mddev->layout; | 3642 | mddev->new_layout = mddev->layout; |
| 3659 | mddev->new_chunk = mddev->chunk_size; | 3643 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 3660 | return len; | 3644 | return len; |
| 3661 | } | 3645 | } |
| 3662 | 3646 | ||
| @@ -3976,11 +3960,9 @@ static int start_dirty_degraded; | |||
| 3976 | static int do_md_run(mddev_t * mddev) | 3960 | static int do_md_run(mddev_t * mddev) |
| 3977 | { | 3961 | { |
| 3978 | int err; | 3962 | int err; |
| 3979 | int chunk_size; | ||
| 3980 | mdk_rdev_t *rdev; | 3963 | mdk_rdev_t *rdev; |
| 3981 | struct gendisk *disk; | 3964 | struct gendisk *disk; |
| 3982 | struct mdk_personality *pers; | 3965 | struct mdk_personality *pers; |
| 3983 | char b[BDEVNAME_SIZE]; | ||
| 3984 | 3966 | ||
| 3985 | if (list_empty(&mddev->disks)) | 3967 | if (list_empty(&mddev->disks)) |
| 3986 | /* cannot run an array with no devices.. */ | 3968 | /* cannot run an array with no devices.. */ |
| @@ -3998,38 +3980,6 @@ static int do_md_run(mddev_t * mddev) | |||
| 3998 | analyze_sbs(mddev); | 3980 | analyze_sbs(mddev); |
| 3999 | } | 3981 | } |
| 4000 | 3982 | ||
| 4001 | chunk_size = mddev->chunk_size; | ||
| 4002 | |||
| 4003 | if (chunk_size) { | ||
| 4004 | if (chunk_size > MAX_CHUNK_SIZE) { | ||
| 4005 | printk(KERN_ERR "too big chunk_size: %d > %d\n", | ||
| 4006 | chunk_size, MAX_CHUNK_SIZE); | ||
| 4007 | return -EINVAL; | ||
| 4008 | } | ||
| 4009 | /* | ||
| 4010 | * chunk-size has to be a power of 2 | ||
| 4011 | */ | ||
| 4012 | if ( (1 << ffz(~chunk_size)) != chunk_size) { | ||
| 4013 | printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); | ||
| 4014 | return -EINVAL; | ||
| 4015 | } | ||
| 4016 | |||
| 4017 | /* devices must have minimum size of one chunk */ | ||
| 4018 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
| 4019 | if (test_bit(Faulty, &rdev->flags)) | ||
| 4020 | continue; | ||
| 4021 | if (rdev->sectors < chunk_size / 512) { | ||
| 4022 | printk(KERN_WARNING | ||
| 4023 | "md: Dev %s smaller than chunk_size:" | ||
| 4024 | " %llu < %d\n", | ||
| 4025 | bdevname(rdev->bdev,b), | ||
| 4026 | (unsigned long long)rdev->sectors, | ||
| 4027 | chunk_size / 512); | ||
| 4028 | return -EINVAL; | ||
| 4029 | } | ||
| 4030 | } | ||
| 4031 | } | ||
| 4032 | |||
| 4033 | if (mddev->level != LEVEL_NONE) | 3983 | if (mddev->level != LEVEL_NONE) |
| 4034 | request_module("md-level-%d", mddev->level); | 3984 | request_module("md-level-%d", mddev->level); |
| 4035 | else if (mddev->clevel[0]) | 3985 | else if (mddev->clevel[0]) |
| @@ -4405,7 +4355,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4405 | mddev->flags = 0; | 4355 | mddev->flags = 0; |
| 4406 | mddev->ro = 0; | 4356 | mddev->ro = 0; |
| 4407 | mddev->metadata_type[0] = 0; | 4357 | mddev->metadata_type[0] = 0; |
| 4408 | mddev->chunk_size = 0; | 4358 | mddev->chunk_sectors = 0; |
| 4409 | mddev->ctime = mddev->utime = 0; | 4359 | mddev->ctime = mddev->utime = 0; |
| 4410 | mddev->layout = 0; | 4360 | mddev->layout = 0; |
| 4411 | mddev->max_disks = 0; | 4361 | mddev->max_disks = 0; |
| @@ -4413,7 +4363,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4413 | mddev->delta_disks = 0; | 4363 | mddev->delta_disks = 0; |
| 4414 | mddev->new_level = LEVEL_NONE; | 4364 | mddev->new_level = LEVEL_NONE; |
| 4415 | mddev->new_layout = 0; | 4365 | mddev->new_layout = 0; |
| 4416 | mddev->new_chunk = 0; | 4366 | mddev->new_chunk_sectors = 0; |
| 4417 | mddev->curr_resync = 0; | 4367 | mddev->curr_resync = 0; |
| 4418 | mddev->resync_mismatches = 0; | 4368 | mddev->resync_mismatches = 0; |
| 4419 | mddev->suspend_lo = mddev->suspend_hi = 0; | 4369 | mddev->suspend_lo = mddev->suspend_hi = 0; |
| @@ -4618,7 +4568,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
| 4618 | info.spare_disks = spare; | 4568 | info.spare_disks = spare; |
| 4619 | 4569 | ||
| 4620 | info.layout = mddev->layout; | 4570 | info.layout = mddev->layout; |
| 4621 | info.chunk_size = mddev->chunk_size; | 4571 | info.chunk_size = mddev->chunk_sectors << 9; |
| 4622 | 4572 | ||
| 4623 | if (copy_to_user(arg, &info, sizeof(info))) | 4573 | if (copy_to_user(arg, &info, sizeof(info))) |
| 4624 | return -EFAULT; | 4574 | return -EFAULT; |
| @@ -4843,7 +4793,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
| 4843 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4793 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
| 4844 | } else | 4794 | } else |
| 4845 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); | 4795 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
| 4846 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); | 4796 | rdev->sectors = rdev->sb_start; |
| 4847 | 4797 | ||
| 4848 | err = bind_rdev_to_array(rdev, mddev); | 4798 | err = bind_rdev_to_array(rdev, mddev); |
| 4849 | if (err) { | 4799 | if (err) { |
| @@ -4913,7 +4863,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
| 4913 | else | 4863 | else |
| 4914 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4864 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
| 4915 | 4865 | ||
| 4916 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); | 4866 | rdev->sectors = rdev->sb_start; |
| 4917 | 4867 | ||
| 4918 | if (test_bit(Faulty, &rdev->flags)) { | 4868 | if (test_bit(Faulty, &rdev->flags)) { |
| 4919 | printk(KERN_WARNING | 4869 | printk(KERN_WARNING |
| @@ -5062,7 +5012,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
| 5062 | mddev->external = 0; | 5012 | mddev->external = 0; |
| 5063 | 5013 | ||
| 5064 | mddev->layout = info->layout; | 5014 | mddev->layout = info->layout; |
| 5065 | mddev->chunk_size = info->chunk_size; | 5015 | mddev->chunk_sectors = info->chunk_size >> 9; |
| 5066 | 5016 | ||
| 5067 | mddev->max_disks = MD_SB_DISKS; | 5017 | mddev->max_disks = MD_SB_DISKS; |
| 5068 | 5018 | ||
| @@ -5081,7 +5031,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
| 5081 | get_random_bytes(mddev->uuid, 16); | 5031 | get_random_bytes(mddev->uuid, 16); |
| 5082 | 5032 | ||
| 5083 | mddev->new_level = mddev->level; | 5033 | mddev->new_level = mddev->level; |
| 5084 | mddev->new_chunk = mddev->chunk_size; | 5034 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 5085 | mddev->new_layout = mddev->layout; | 5035 | mddev->new_layout = mddev->layout; |
| 5086 | mddev->delta_disks = 0; | 5036 | mddev->delta_disks = 0; |
| 5087 | 5037 | ||
| @@ -5191,7 +5141,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 5191 | mddev->level != info->level || | 5141 | mddev->level != info->level || |
| 5192 | /* mddev->layout != info->layout || */ | 5142 | /* mddev->layout != info->layout || */ |
| 5193 | !mddev->persistent != info->not_persistent|| | 5143 | !mddev->persistent != info->not_persistent|| |
| 5194 | mddev->chunk_size != info->chunk_size || | 5144 | mddev->chunk_sectors != info->chunk_size >> 9 || |
| 5195 | /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ | 5145 | /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ |
| 5196 | ((state^info->state) & 0xfffffe00) | 5146 | ((state^info->state) & 0xfffffe00) |
| 5197 | ) | 5147 | ) |
| @@ -5215,10 +5165,15 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 5215 | * we don't need to do anything at the md level, the | 5165 | * we don't need to do anything at the md level, the |
| 5216 | * personality will take care of it all. | 5166 | * personality will take care of it all. |
| 5217 | */ | 5167 | */ |
| 5218 | if (mddev->pers->reconfig == NULL) | 5168 | if (mddev->pers->check_reshape == NULL) |
| 5219 | return -EINVAL; | 5169 | return -EINVAL; |
| 5220 | else | 5170 | else { |
| 5221 | return mddev->pers->reconfig(mddev, info->layout, -1); | 5171 | mddev->new_layout = info->layout; |
| 5172 | rv = mddev->pers->check_reshape(mddev); | ||
| 5173 | if (rv) | ||
| 5174 | mddev->new_layout = mddev->layout; | ||
| 5175 | return rv; | ||
| 5176 | } | ||
| 5222 | } | 5177 | } |
| 5223 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) | 5178 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) |
| 5224 | rv = update_size(mddev, (sector_t)info->size * 2); | 5179 | rv = update_size(mddev, (sector_t)info->size * 2); |
| @@ -6717,7 +6672,8 @@ void md_check_recovery(mddev_t *mddev) | |||
| 6717 | */ | 6672 | */ |
| 6718 | 6673 | ||
| 6719 | if (mddev->reshape_position != MaxSector) { | 6674 | if (mddev->reshape_position != MaxSector) { |
| 6720 | if (mddev->pers->check_reshape(mddev) != 0) | 6675 | if (mddev->pers->check_reshape == NULL || |
| 6676 | mddev->pers->check_reshape(mddev) != 0) | ||
| 6721 | /* Cannot proceed */ | 6677 | /* Cannot proceed */ |
| 6722 | goto unlock; | 6678 | goto unlock; |
| 6723 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 6679 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 8227ab909d44..9430a110db93 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -30,13 +30,6 @@ typedef struct mddev_s mddev_t; | |||
| 30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
| 31 | 31 | ||
| 32 | /* | 32 | /* |
| 33 | * options passed in raidrun: | ||
| 34 | */ | ||
| 35 | |||
| 36 | /* Currently this must fit in an 'int' */ | ||
| 37 | #define MAX_CHUNK_SIZE (1<<30) | ||
| 38 | |||
| 39 | /* | ||
| 40 | * MD's 'extended' device | 33 | * MD's 'extended' device |
| 41 | */ | 34 | */ |
| 42 | struct mdk_rdev_s | 35 | struct mdk_rdev_s |
| @@ -145,7 +138,7 @@ struct mddev_s | |||
| 145 | int external; /* metadata is | 138 | int external; /* metadata is |
| 146 | * managed externally */ | 139 | * managed externally */ |
| 147 | char metadata_type[17]; /* externally set*/ | 140 | char metadata_type[17]; /* externally set*/ |
| 148 | int chunk_size; | 141 | int chunk_sectors; |
| 149 | time_t ctime, utime; | 142 | time_t ctime, utime; |
| 150 | int level, layout; | 143 | int level, layout; |
| 151 | char clevel[16]; | 144 | char clevel[16]; |
| @@ -166,7 +159,8 @@ struct mddev_s | |||
| 166 | * If reshape_position is MaxSector, then no reshape is happening (yet). | 159 | * If reshape_position is MaxSector, then no reshape is happening (yet). |
| 167 | */ | 160 | */ |
| 168 | sector_t reshape_position; | 161 | sector_t reshape_position; |
| 169 | int delta_disks, new_level, new_layout, new_chunk; | 162 | int delta_disks, new_level, new_layout; |
| 163 | int new_chunk_sectors; | ||
| 170 | 164 | ||
| 171 | struct mdk_thread_s *thread; /* management thread */ | 165 | struct mdk_thread_s *thread; /* management thread */ |
| 172 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ | 166 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
| @@ -325,7 +319,6 @@ struct mdk_personality | |||
| 325 | int (*check_reshape) (mddev_t *mddev); | 319 | int (*check_reshape) (mddev_t *mddev); |
| 326 | int (*start_reshape) (mddev_t *mddev); | 320 | int (*start_reshape) (mddev_t *mddev); |
| 327 | void (*finish_reshape) (mddev_t *mddev); | 321 | void (*finish_reshape) (mddev_t *mddev); |
| 328 | int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); | ||
| 329 | /* quiesce moves between quiescence states | 322 | /* quiesce moves between quiescence states |
| 330 | * 0 - fully active | 323 | * 0 - fully active |
| 331 | * 1 - no new requests allowed | 324 | * 1 - no new requests allowed |
| @@ -437,5 +430,6 @@ extern void md_new_event(mddev_t *mddev); | |||
| 437 | extern int md_allow_write(mddev_t *mddev); | 430 | extern int md_allow_write(mddev_t *mddev); |
| 438 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
| 439 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
| 433 | extern int md_check_no_bitmap(mddev_t *mddev); | ||
| 440 | 434 | ||
| 441 | #endif /* _MD_MD_H */ | 435 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4ee31aa13c40..cbe368fa6598 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
| @@ -58,7 +58,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) | |||
| 58 | { | 58 | { |
| 59 | unsigned long flags; | 59 | unsigned long flags; |
| 60 | mddev_t *mddev = mp_bh->mddev; | 60 | mddev_t *mddev = mp_bh->mddev; |
| 61 | multipath_conf_t *conf = mddev_to_conf(mddev); | 61 | multipath_conf_t *conf = mddev->private; |
| 62 | 62 | ||
| 63 | spin_lock_irqsave(&conf->device_lock, flags); | 63 | spin_lock_irqsave(&conf->device_lock, flags); |
| 64 | list_add(&mp_bh->retry_list, &conf->retry_list); | 64 | list_add(&mp_bh->retry_list, &conf->retry_list); |
| @@ -75,7 +75,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) | |||
| 75 | static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) | 75 | static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) |
| 76 | { | 76 | { |
| 77 | struct bio *bio = mp_bh->master_bio; | 77 | struct bio *bio = mp_bh->master_bio; |
| 78 | multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); | 78 | multipath_conf_t *conf = mp_bh->mddev->private; |
| 79 | 79 | ||
| 80 | bio_endio(bio, err); | 80 | bio_endio(bio, err); |
| 81 | mempool_free(mp_bh, conf->pool); | 81 | mempool_free(mp_bh, conf->pool); |
| @@ -85,7 +85,7 @@ static void multipath_end_request(struct bio *bio, int error) | |||
| 85 | { | 85 | { |
| 86 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 86 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 87 | struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); | 87 | struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); |
| 88 | multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); | 88 | multipath_conf_t *conf = mp_bh->mddev->private; |
| 89 | mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; | 89 | mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; |
| 90 | 90 | ||
| 91 | if (uptodate) | 91 | if (uptodate) |
| @@ -107,7 +107,7 @@ static void multipath_end_request(struct bio *bio, int error) | |||
| 107 | 107 | ||
| 108 | static void unplug_slaves(mddev_t *mddev) | 108 | static void unplug_slaves(mddev_t *mddev) |
| 109 | { | 109 | { |
| 110 | multipath_conf_t *conf = mddev_to_conf(mddev); | 110 | multipath_conf_t *conf = mddev->private; |
| 111 | int i; | 111 | int i; |
| 112 | 112 | ||
| 113 | rcu_read_lock(); | 113 | rcu_read_lock(); |
| @@ -138,7 +138,7 @@ static void multipath_unplug(struct request_queue *q) | |||
| 138 | static int multipath_make_request (struct request_queue *q, struct bio * bio) | 138 | static int multipath_make_request (struct request_queue *q, struct bio * bio) |
| 139 | { | 139 | { |
| 140 | mddev_t *mddev = q->queuedata; | 140 | mddev_t *mddev = q->queuedata; |
| 141 | multipath_conf_t *conf = mddev_to_conf(mddev); | 141 | multipath_conf_t *conf = mddev->private; |
| 142 | struct multipath_bh * mp_bh; | 142 | struct multipath_bh * mp_bh; |
| 143 | struct multipath_info *multipath; | 143 | struct multipath_info *multipath; |
| 144 | const int rw = bio_data_dir(bio); | 144 | const int rw = bio_data_dir(bio); |
| @@ -180,7 +180,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) | |||
| 180 | 180 | ||
| 181 | static void multipath_status (struct seq_file *seq, mddev_t *mddev) | 181 | static void multipath_status (struct seq_file *seq, mddev_t *mddev) |
| 182 | { | 182 | { |
| 183 | multipath_conf_t *conf = mddev_to_conf(mddev); | 183 | multipath_conf_t *conf = mddev->private; |
| 184 | int i; | 184 | int i; |
| 185 | 185 | ||
| 186 | seq_printf (seq, " [%d/%d] [", conf->raid_disks, | 186 | seq_printf (seq, " [%d/%d] [", conf->raid_disks, |
| @@ -195,7 +195,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) | |||
| 195 | static int multipath_congested(void *data, int bits) | 195 | static int multipath_congested(void *data, int bits) |
| 196 | { | 196 | { |
| 197 | mddev_t *mddev = data; | 197 | mddev_t *mddev = data; |
| 198 | multipath_conf_t *conf = mddev_to_conf(mddev); | 198 | multipath_conf_t *conf = mddev->private; |
| 199 | int i, ret = 0; | 199 | int i, ret = 0; |
| 200 | 200 | ||
| 201 | rcu_read_lock(); | 201 | rcu_read_lock(); |
| @@ -220,7 +220,7 @@ static int multipath_congested(void *data, int bits) | |||
| 220 | */ | 220 | */ |
| 221 | static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) | 221 | static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) |
| 222 | { | 222 | { |
| 223 | multipath_conf_t *conf = mddev_to_conf(mddev); | 223 | multipath_conf_t *conf = mddev->private; |
| 224 | 224 | ||
| 225 | if (conf->working_disks <= 1) { | 225 | if (conf->working_disks <= 1) { |
| 226 | /* | 226 | /* |
| @@ -367,7 +367,7 @@ static void multipathd (mddev_t *mddev) | |||
| 367 | struct multipath_bh *mp_bh; | 367 | struct multipath_bh *mp_bh; |
| 368 | struct bio *bio; | 368 | struct bio *bio; |
| 369 | unsigned long flags; | 369 | unsigned long flags; |
| 370 | multipath_conf_t *conf = mddev_to_conf(mddev); | 370 | multipath_conf_t *conf = mddev->private; |
| 371 | struct list_head *head = &conf->retry_list; | 371 | struct list_head *head = &conf->retry_list; |
| 372 | 372 | ||
| 373 | md_check_recovery(mddev); | 373 | md_check_recovery(mddev); |
| @@ -421,6 +421,9 @@ static int multipath_run (mddev_t *mddev) | |||
| 421 | struct multipath_info *disk; | 421 | struct multipath_info *disk; |
| 422 | mdk_rdev_t *rdev; | 422 | mdk_rdev_t *rdev; |
| 423 | 423 | ||
| 424 | if (md_check_no_bitmap(mddev)) | ||
| 425 | return -EINVAL; | ||
| 426 | |||
| 424 | if (mddev->level != LEVEL_MULTIPATH) { | 427 | if (mddev->level != LEVEL_MULTIPATH) { |
| 425 | printk("multipath: %s: raid level not set to multipath IO (%d)\n", | 428 | printk("multipath: %s: raid level not set to multipath IO (%d)\n", |
| 426 | mdname(mddev), mddev->level); | 429 | mdname(mddev), mddev->level); |
| @@ -531,7 +534,7 @@ out: | |||
| 531 | 534 | ||
| 532 | static int multipath_stop (mddev_t *mddev) | 535 | static int multipath_stop (mddev_t *mddev) |
| 533 | { | 536 | { |
| 534 | multipath_conf_t *conf = mddev_to_conf(mddev); | 537 | multipath_conf_t *conf = mddev->private; |
| 535 | 538 | ||
| 536 | md_unregister_thread(mddev->thread); | 539 | md_unregister_thread(mddev->thread); |
| 537 | mddev->thread = NULL; | 540 | mddev->thread = NULL; |
diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index 6fa70b400cda..d1c2a8d78395 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h | |||
| @@ -19,12 +19,6 @@ struct multipath_private_data { | |||
| 19 | typedef struct multipath_private_data multipath_conf_t; | 19 | typedef struct multipath_private_data multipath_conf_t; |
| 20 | 20 | ||
| 21 | /* | 21 | /* |
| 22 | * this is the only point in the RAID code where we violate | ||
| 23 | * C type safety. mddev->private is an 'opaque' pointer. | ||
| 24 | */ | ||
| 25 | #define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private) | ||
| 26 | |||
| 27 | /* | ||
| 28 | * this is our 'private' 'collective' MULTIPATH buffer head. | 22 | * this is our 'private' 'collective' MULTIPATH buffer head. |
| 29 | * it contains information about what kind of IO operations were started | 23 | * it contains information about what kind of IO operations were started |
| 30 | * for this MULTIPATH operation, and about their status: | 24 | * for this MULTIPATH operation, and about their status: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 925507e7d673..ab4a489d8695 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
| @@ -26,8 +26,8 @@ | |||
| 26 | static void raid0_unplug(struct request_queue *q) | 26 | static void raid0_unplug(struct request_queue *q) |
| 27 | { | 27 | { |
| 28 | mddev_t *mddev = q->queuedata; | 28 | mddev_t *mddev = q->queuedata; |
| 29 | raid0_conf_t *conf = mddev_to_conf(mddev); | 29 | raid0_conf_t *conf = mddev->private; |
| 30 | mdk_rdev_t **devlist = conf->strip_zone[0].dev; | 30 | mdk_rdev_t **devlist = conf->devlist; |
| 31 | int i; | 31 | int i; |
| 32 | 32 | ||
| 33 | for (i=0; i<mddev->raid_disks; i++) { | 33 | for (i=0; i<mddev->raid_disks; i++) { |
| @@ -40,8 +40,8 @@ static void raid0_unplug(struct request_queue *q) | |||
| 40 | static int raid0_congested(void *data, int bits) | 40 | static int raid0_congested(void *data, int bits) |
| 41 | { | 41 | { |
| 42 | mddev_t *mddev = data; | 42 | mddev_t *mddev = data; |
| 43 | raid0_conf_t *conf = mddev_to_conf(mddev); | 43 | raid0_conf_t *conf = mddev->private; |
| 44 | mdk_rdev_t **devlist = conf->strip_zone[0].dev; | 44 | mdk_rdev_t **devlist = conf->devlist; |
| 45 | int i, ret = 0; | 45 | int i, ret = 0; |
| 46 | 46 | ||
| 47 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { | 47 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { |
| @@ -52,27 +52,60 @@ static int raid0_congested(void *data, int bits) | |||
| 52 | return ret; | 52 | return ret; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | /* | ||
| 56 | * inform the user of the raid configuration | ||
| 57 | */ | ||
| 58 | static void dump_zones(mddev_t *mddev) | ||
| 59 | { | ||
| 60 | int j, k, h; | ||
| 61 | sector_t zone_size = 0; | ||
| 62 | sector_t zone_start = 0; | ||
| 63 | char b[BDEVNAME_SIZE]; | ||
| 64 | raid0_conf_t *conf = mddev->private; | ||
| 65 | printk(KERN_INFO "******* %s configuration *********\n", | ||
| 66 | mdname(mddev)); | ||
| 67 | h = 0; | ||
| 68 | for (j = 0; j < conf->nr_strip_zones; j++) { | ||
| 69 | printk(KERN_INFO "zone%d=[", j); | ||
| 70 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | ||
| 71 | printk("%s/", | ||
| 72 | bdevname(conf->devlist[j*mddev->raid_disks | ||
| 73 | + k]->bdev, b)); | ||
| 74 | printk("]\n"); | ||
| 75 | |||
| 76 | zone_size = conf->strip_zone[j].zone_end - zone_start; | ||
| 77 | printk(KERN_INFO " zone offset=%llukb " | ||
| 78 | "device offset=%llukb size=%llukb\n", | ||
| 79 | (unsigned long long)zone_start>>1, | ||
| 80 | (unsigned long long)conf->strip_zone[j].dev_start>>1, | ||
| 81 | (unsigned long long)zone_size>>1); | ||
| 82 | zone_start = conf->strip_zone[j].zone_end; | ||
| 83 | } | ||
| 84 | printk(KERN_INFO "**********************************\n\n"); | ||
| 85 | } | ||
| 55 | 86 | ||
| 56 | static int create_strip_zones (mddev_t *mddev) | 87 | static int create_strip_zones(mddev_t *mddev) |
| 57 | { | 88 | { |
| 58 | int i, c, j; | 89 | int i, c, j, err; |
| 59 | sector_t current_start, curr_zone_start; | 90 | sector_t curr_zone_end, sectors; |
| 60 | sector_t min_spacing; | 91 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; |
| 61 | raid0_conf_t *conf = mddev_to_conf(mddev); | ||
| 62 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | ||
| 63 | struct strip_zone *zone; | 92 | struct strip_zone *zone; |
| 64 | int cnt; | 93 | int cnt; |
| 65 | char b[BDEVNAME_SIZE]; | 94 | char b[BDEVNAME_SIZE]; |
| 66 | 95 | raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL); | |
| 67 | /* | 96 | |
| 68 | * The number of 'same size groups' | 97 | if (!conf) |
| 69 | */ | 98 | return -ENOMEM; |
| 70 | conf->nr_strip_zones = 0; | ||
| 71 | |||
| 72 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 99 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
| 73 | printk(KERN_INFO "raid0: looking at %s\n", | 100 | printk(KERN_INFO "raid0: looking at %s\n", |
| 74 | bdevname(rdev1->bdev,b)); | 101 | bdevname(rdev1->bdev,b)); |
| 75 | c = 0; | 102 | c = 0; |
| 103 | |||
| 104 | /* round size to chunk_size */ | ||
| 105 | sectors = rdev1->sectors; | ||
| 106 | sector_div(sectors, mddev->chunk_sectors); | ||
| 107 | rdev1->sectors = sectors * mddev->chunk_sectors; | ||
| 108 | |||
| 76 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 109 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
| 77 | printk(KERN_INFO "raid0: comparing %s(%llu)", | 110 | printk(KERN_INFO "raid0: comparing %s(%llu)", |
| 78 | bdevname(rdev1->bdev,b), | 111 | bdevname(rdev1->bdev,b), |
| @@ -103,16 +136,16 @@ static int create_strip_zones (mddev_t *mddev) | |||
| 103 | } | 136 | } |
| 104 | } | 137 | } |
| 105 | printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); | 138 | printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); |
| 106 | 139 | err = -ENOMEM; | |
| 107 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* | 140 | conf->strip_zone = kzalloc(sizeof(struct strip_zone)* |
| 108 | conf->nr_strip_zones, GFP_KERNEL); | 141 | conf->nr_strip_zones, GFP_KERNEL); |
| 109 | if (!conf->strip_zone) | 142 | if (!conf->strip_zone) |
| 110 | return 1; | 143 | goto abort; |
| 111 | conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* | 144 | conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* |
| 112 | conf->nr_strip_zones*mddev->raid_disks, | 145 | conf->nr_strip_zones*mddev->raid_disks, |
| 113 | GFP_KERNEL); | 146 | GFP_KERNEL); |
| 114 | if (!conf->devlist) | 147 | if (!conf->devlist) |
| 115 | return 1; | 148 | goto abort; |
| 116 | 149 | ||
| 117 | /* The first zone must contain all devices, so here we check that | 150 | /* The first zone must contain all devices, so here we check that |
| 118 | * there is a proper alignment of slots to devices and find them all | 151 | * there is a proper alignment of slots to devices and find them all |
| @@ -120,7 +153,8 @@ static int create_strip_zones (mddev_t *mddev) | |||
| 120 | zone = &conf->strip_zone[0]; | 153 | zone = &conf->strip_zone[0]; |
| 121 | cnt = 0; | 154 | cnt = 0; |
| 122 | smallest = NULL; | 155 | smallest = NULL; |
| 123 | zone->dev = conf->devlist; | 156 | dev = conf->devlist; |
| 157 | err = -EINVAL; | ||
| 124 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 158 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
| 125 | int j = rdev1->raid_disk; | 159 | int j = rdev1->raid_disk; |
| 126 | 160 | ||
| @@ -129,12 +163,12 @@ static int create_strip_zones (mddev_t *mddev) | |||
| 129 | "aborting!\n", j); | 163 | "aborting!\n", j); |
| 130 | goto abort; | 164 | goto abort; |
| 131 | } | 165 | } |
| 132 | if (zone->dev[j]) { | 166 | if (dev[j]) { |
| 133 | printk(KERN_ERR "raid0: multiple devices for %d - " | 167 | printk(KERN_ERR "raid0: multiple devices for %d - " |
| 134 | "aborting!\n", j); | 168 | "aborting!\n", j); |
| 135 | goto abort; | 169 | goto abort; |
| 136 | } | 170 | } |
| 137 | zone->dev[j] = rdev1; | 171 | dev[j] = rdev1; |
| 138 | 172 | ||
| 139 | blk_queue_stack_limits(mddev->queue, | 173 | blk_queue_stack_limits(mddev->queue, |
| 140 | rdev1->bdev->bd_disk->queue); | 174 | rdev1->bdev->bd_disk->queue); |
| @@ -157,34 +191,32 @@ static int create_strip_zones (mddev_t *mddev) | |||
| 157 | goto abort; | 191 | goto abort; |
| 158 | } | 192 | } |
| 159 | zone->nb_dev = cnt; | 193 | zone->nb_dev = cnt; |
| 160 | zone->sectors = smallest->sectors * cnt; | 194 | zone->zone_end = smallest->sectors * cnt; |
| 161 | zone->zone_start = 0; | ||
| 162 | 195 | ||
| 163 | current_start = smallest->sectors; | 196 | curr_zone_end = zone->zone_end; |
| 164 | curr_zone_start = zone->sectors; | ||
| 165 | 197 | ||
| 166 | /* now do the other zones */ | 198 | /* now do the other zones */ |
| 167 | for (i = 1; i < conf->nr_strip_zones; i++) | 199 | for (i = 1; i < conf->nr_strip_zones; i++) |
| 168 | { | 200 | { |
| 169 | zone = conf->strip_zone + i; | 201 | zone = conf->strip_zone + i; |
| 170 | zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; | 202 | dev = conf->devlist + i * mddev->raid_disks; |
| 171 | 203 | ||
| 172 | printk(KERN_INFO "raid0: zone %d\n", i); | 204 | printk(KERN_INFO "raid0: zone %d\n", i); |
| 173 | zone->dev_start = current_start; | 205 | zone->dev_start = smallest->sectors; |
| 174 | smallest = NULL; | 206 | smallest = NULL; |
| 175 | c = 0; | 207 | c = 0; |
| 176 | 208 | ||
| 177 | for (j=0; j<cnt; j++) { | 209 | for (j=0; j<cnt; j++) { |
| 178 | char b[BDEVNAME_SIZE]; | 210 | char b[BDEVNAME_SIZE]; |
| 179 | rdev = conf->strip_zone[0].dev[j]; | 211 | rdev = conf->devlist[j]; |
| 180 | printk(KERN_INFO "raid0: checking %s ...", | 212 | printk(KERN_INFO "raid0: checking %s ...", |
| 181 | bdevname(rdev->bdev, b)); | 213 | bdevname(rdev->bdev, b)); |
| 182 | if (rdev->sectors <= current_start) { | 214 | if (rdev->sectors <= zone->dev_start) { |
| 183 | printk(KERN_INFO " nope.\n"); | 215 | printk(KERN_INFO " nope.\n"); |
| 184 | continue; | 216 | continue; |
| 185 | } | 217 | } |
| 186 | printk(KERN_INFO " contained as device %d\n", c); | 218 | printk(KERN_INFO " contained as device %d\n", c); |
| 187 | zone->dev[c] = rdev; | 219 | dev[c] = rdev; |
| 188 | c++; | 220 | c++; |
| 189 | if (!smallest || rdev->sectors < smallest->sectors) { | 221 | if (!smallest || rdev->sectors < smallest->sectors) { |
| 190 | smallest = rdev; | 222 | smallest = rdev; |
| @@ -194,47 +226,39 @@ static int create_strip_zones (mddev_t *mddev) | |||
| 194 | } | 226 | } |
| 195 | 227 | ||
| 196 | zone->nb_dev = c; | 228 | zone->nb_dev = c; |
| 197 | zone->sectors = (smallest->sectors - current_start) * c; | 229 | sectors = (smallest->sectors - zone->dev_start) * c; |
| 198 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", | 230 | printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", |
| 199 | zone->nb_dev, (unsigned long long)zone->sectors); | 231 | zone->nb_dev, (unsigned long long)sectors); |
| 200 | 232 | ||
| 201 | zone->zone_start = curr_zone_start; | 233 | curr_zone_end += sectors; |
| 202 | curr_zone_start += zone->sectors; | 234 | zone->zone_end = curr_zone_end; |
| 203 | 235 | ||
| 204 | current_start = smallest->sectors; | ||
| 205 | printk(KERN_INFO "raid0: current zone start: %llu\n", | 236 | printk(KERN_INFO "raid0: current zone start: %llu\n", |
| 206 | (unsigned long long)current_start); | 237 | (unsigned long long)smallest->sectors); |
| 207 | } | ||
| 208 | |||
| 209 | /* Now find appropriate hash spacing. | ||
| 210 | * We want a number which causes most hash entries to cover | ||
| 211 | * at most two strips, but the hash table must be at most | ||
| 212 | * 1 PAGE. We choose the smallest strip, or contiguous collection | ||
| 213 | * of strips, that has big enough size. We never consider the last | ||
| 214 | * strip though as it's size has no bearing on the efficacy of the hash | ||
| 215 | * table. | ||
| 216 | */ | ||
| 217 | conf->spacing = curr_zone_start; | ||
| 218 | min_spacing = curr_zone_start; | ||
| 219 | sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); | ||
| 220 | for (i=0; i < conf->nr_strip_zones-1; i++) { | ||
| 221 | sector_t s = 0; | ||
| 222 | for (j = i; j < conf->nr_strip_zones - 1 && | ||
| 223 | s < min_spacing; j++) | ||
| 224 | s += conf->strip_zone[j].sectors; | ||
| 225 | if (s >= min_spacing && s < conf->spacing) | ||
| 226 | conf->spacing = s; | ||
| 227 | } | 238 | } |
| 228 | |||
| 229 | mddev->queue->unplug_fn = raid0_unplug; | 239 | mddev->queue->unplug_fn = raid0_unplug; |
| 230 | |||
| 231 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; | 240 | mddev->queue->backing_dev_info.congested_fn = raid0_congested; |
| 232 | mddev->queue->backing_dev_info.congested_data = mddev; | 241 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 233 | 242 | ||
| 243 | /* | ||
| 244 | * now since we have the hard sector sizes, we can make sure | ||
| 245 | * chunk size is a multiple of that sector size | ||
| 246 | */ | ||
| 247 | if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { | ||
| 248 | printk(KERN_ERR "%s chunk_size of %d not valid\n", | ||
| 249 | mdname(mddev), | ||
| 250 | mddev->chunk_sectors << 9); | ||
| 251 | goto abort; | ||
| 252 | } | ||
| 234 | printk(KERN_INFO "raid0: done.\n"); | 253 | printk(KERN_INFO "raid0: done.\n"); |
| 254 | mddev->private = conf; | ||
| 235 | return 0; | 255 | return 0; |
| 236 | abort: | 256 | abort: |
| 237 | return 1; | 257 | kfree(conf->strip_zone); |
| 258 | kfree(conf->devlist); | ||
| 259 | kfree(conf); | ||
| 260 | mddev->private = NULL; | ||
| 261 | return err; | ||
| 238 | } | 262 | } |
| 239 | 263 | ||
| 240 | /** | 264 | /** |
| @@ -252,10 +276,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
| 252 | mddev_t *mddev = q->queuedata; | 276 | mddev_t *mddev = q->queuedata; |
| 253 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 277 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
| 254 | int max; | 278 | int max; |
| 255 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 279 | unsigned int chunk_sectors = mddev->chunk_sectors; |
| 256 | unsigned int bio_sectors = bvm->bi_size >> 9; | 280 | unsigned int bio_sectors = bvm->bi_size >> 9; |
| 257 | 281 | ||
| 258 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 282 | if (is_power_of_2(chunk_sectors)) |
| 283 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) | ||
| 284 | + bio_sectors)) << 9; | ||
| 285 | else | ||
| 286 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) | ||
| 287 | + bio_sectors)) << 9; | ||
| 259 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 288 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ |
| 260 | if (max <= biovec->bv_len && bio_sectors == 0) | 289 | if (max <= biovec->bv_len && bio_sectors == 0) |
| 261 | return biovec->bv_len; | 290 | return biovec->bv_len; |
| @@ -277,84 +306,28 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 277 | return array_sectors; | 306 | return array_sectors; |
| 278 | } | 307 | } |
| 279 | 308 | ||
| 280 | static int raid0_run (mddev_t *mddev) | 309 | static int raid0_run(mddev_t *mddev) |
| 281 | { | 310 | { |
| 282 | unsigned cur=0, i=0, nb_zone; | 311 | int ret; |
| 283 | s64 sectors; | ||
| 284 | raid0_conf_t *conf; | ||
| 285 | 312 | ||
| 286 | if (mddev->chunk_size == 0) { | 313 | if (mddev->chunk_sectors == 0) { |
| 287 | printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); | 314 | printk(KERN_ERR "md/raid0: chunk size must be set.\n"); |
| 288 | return -EINVAL; | 315 | return -EINVAL; |
| 289 | } | 316 | } |
| 290 | printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", | 317 | if (md_check_no_bitmap(mddev)) |
| 291 | mdname(mddev), | 318 | return -EINVAL; |
| 292 | mddev->chunk_size >> 9, | 319 | blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); |
| 293 | (mddev->chunk_size>>1)-1); | ||
| 294 | blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); | ||
| 295 | blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); | ||
| 296 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; | 320 | mddev->queue->queue_lock = &mddev->queue->__queue_lock; |
| 297 | 321 | ||
| 298 | conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); | 322 | ret = create_strip_zones(mddev); |
| 299 | if (!conf) | 323 | if (ret < 0) |
| 300 | goto out; | 324 | return ret; |
| 301 | mddev->private = (void *)conf; | ||
| 302 | |||
| 303 | conf->strip_zone = NULL; | ||
| 304 | conf->devlist = NULL; | ||
| 305 | if (create_strip_zones (mddev)) | ||
| 306 | goto out_free_conf; | ||
| 307 | 325 | ||
| 308 | /* calculate array device size */ | 326 | /* calculate array device size */ |
| 309 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); | 327 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); |
| 310 | 328 | ||
| 311 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", | 329 | printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", |
| 312 | (unsigned long long)mddev->array_sectors); | 330 | (unsigned long long)mddev->array_sectors); |
| 313 | printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", | ||
| 314 | (unsigned long long)conf->spacing); | ||
| 315 | { | ||
| 316 | sector_t s = raid0_size(mddev, 0, 0); | ||
| 317 | sector_t space = conf->spacing; | ||
| 318 | int round; | ||
| 319 | conf->sector_shift = 0; | ||
| 320 | if (sizeof(sector_t) > sizeof(u32)) { | ||
| 321 | /*shift down space and s so that sector_div will work */ | ||
| 322 | while (space > (sector_t) (~(u32)0)) { | ||
| 323 | s >>= 1; | ||
| 324 | space >>= 1; | ||
| 325 | s += 1; /* force round-up */ | ||
| 326 | conf->sector_shift++; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | round = sector_div(s, (u32)space) ? 1 : 0; | ||
| 330 | nb_zone = s + round; | ||
| 331 | } | ||
| 332 | printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); | ||
| 333 | |||
| 334 | printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", | ||
| 335 | nb_zone*sizeof(struct strip_zone*)); | ||
| 336 | conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); | ||
| 337 | if (!conf->hash_table) | ||
| 338 | goto out_free_conf; | ||
| 339 | sectors = conf->strip_zone[cur].sectors; | ||
| 340 | |||
| 341 | conf->hash_table[0] = conf->strip_zone + cur; | ||
| 342 | for (i=1; i< nb_zone; i++) { | ||
| 343 | while (sectors <= conf->spacing) { | ||
| 344 | cur++; | ||
| 345 | sectors += conf->strip_zone[cur].sectors; | ||
| 346 | } | ||
| 347 | sectors -= conf->spacing; | ||
| 348 | conf->hash_table[i] = conf->strip_zone + cur; | ||
| 349 | } | ||
| 350 | if (conf->sector_shift) { | ||
| 351 | conf->spacing >>= conf->sector_shift; | ||
| 352 | /* round spacing up so when we divide by it, we | ||
| 353 | * err on the side of too-low, which is safest | ||
| 354 | */ | ||
| 355 | conf->spacing++; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* calculate the max read-ahead size. | 331 | /* calculate the max read-ahead size. |
| 359 | * For read-ahead of large files to be effective, we need to | 332 | * For read-ahead of large files to be effective, we need to |
| 360 | * readahead at least twice a whole stripe. i.e. number of devices | 333 | * readahead at least twice a whole stripe. i.e. number of devices |
| @@ -365,48 +338,107 @@ static int raid0_run (mddev_t *mddev) | |||
| 365 | * chunksize should be used in that case. | 338 | * chunksize should be used in that case. |
| 366 | */ | 339 | */ |
| 367 | { | 340 | { |
| 368 | int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; | 341 | int stripe = mddev->raid_disks * |
| 342 | (mddev->chunk_sectors << 9) / PAGE_SIZE; | ||
| 369 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) | 343 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) |
| 370 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 344 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
| 371 | } | 345 | } |
| 372 | 346 | ||
| 373 | |||
| 374 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 347 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
| 348 | dump_zones(mddev); | ||
| 375 | return 0; | 349 | return 0; |
| 350 | } | ||
| 376 | 351 | ||
| 377 | out_free_conf: | 352 | static int raid0_stop(mddev_t *mddev) |
| 353 | { | ||
| 354 | raid0_conf_t *conf = mddev->private; | ||
| 355 | |||
| 356 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
| 378 | kfree(conf->strip_zone); | 357 | kfree(conf->strip_zone); |
| 379 | kfree(conf->devlist); | 358 | kfree(conf->devlist); |
| 380 | kfree(conf); | 359 | kfree(conf); |
| 381 | mddev->private = NULL; | 360 | mddev->private = NULL; |
| 382 | out: | 361 | return 0; |
| 383 | return -ENOMEM; | ||
| 384 | } | 362 | } |
| 385 | 363 | ||
| 386 | static int raid0_stop (mddev_t *mddev) | 364 | /* Find the zone which holds a particular offset |
| 365 | * Update *sectorp to be an offset in that zone | ||
| 366 | */ | ||
| 367 | static struct strip_zone *find_zone(struct raid0_private_data *conf, | ||
| 368 | sector_t *sectorp) | ||
| 387 | { | 369 | { |
| 388 | raid0_conf_t *conf = mddev_to_conf(mddev); | 370 | int i; |
| 371 | struct strip_zone *z = conf->strip_zone; | ||
| 372 | sector_t sector = *sectorp; | ||
| 373 | |||
| 374 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
| 375 | if (sector < z[i].zone_end) { | ||
| 376 | if (i) | ||
| 377 | *sectorp = sector - z[i-1].zone_end; | ||
| 378 | return z + i; | ||
| 379 | } | ||
| 380 | BUG(); | ||
| 381 | } | ||
| 389 | 382 | ||
| 390 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 383 | /* |
| 391 | kfree(conf->hash_table); | 384 | * remaps the bio to the target device. we separate two flows. |
| 392 | conf->hash_table = NULL; | 385 | * power 2 flow and a general flow for the sake of perfromance |
| 393 | kfree(conf->strip_zone); | 386 | */ |
| 394 | conf->strip_zone = NULL; | 387 | static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, |
| 395 | kfree(conf); | 388 | sector_t sector, sector_t *sector_offset) |
| 396 | mddev->private = NULL; | 389 | { |
| 390 | unsigned int sect_in_chunk; | ||
| 391 | sector_t chunk; | ||
| 392 | raid0_conf_t *conf = mddev->private; | ||
| 393 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
| 394 | |||
| 395 | if (is_power_of_2(chunk_sects)) { | ||
| 396 | int chunksect_bits = ffz(~chunk_sects); | ||
| 397 | /* find the sector offset inside the chunk */ | ||
| 398 | sect_in_chunk = sector & (chunk_sects - 1); | ||
| 399 | sector >>= chunksect_bits; | ||
| 400 | /* chunk in zone */ | ||
| 401 | chunk = *sector_offset; | ||
| 402 | /* quotient is the chunk in real device*/ | ||
| 403 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
| 404 | } else{ | ||
| 405 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
| 406 | chunk = *sector_offset; | ||
| 407 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
| 408 | } | ||
| 409 | /* | ||
| 410 | * position the bio over the real device | ||
| 411 | * real sector = chunk in device + starting of zone | ||
| 412 | * + the position in the chunk | ||
| 413 | */ | ||
| 414 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
| 415 | return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks | ||
| 416 | + sector_div(sector, zone->nb_dev)]; | ||
| 417 | } | ||
| 397 | 418 | ||
| 398 | return 0; | 419 | /* |
| 420 | * Is io distribute over 1 or more chunks ? | ||
| 421 | */ | ||
| 422 | static inline int is_io_in_chunk_boundary(mddev_t *mddev, | ||
| 423 | unsigned int chunk_sects, struct bio *bio) | ||
| 424 | { | ||
| 425 | if (likely(is_power_of_2(chunk_sects))) { | ||
| 426 | return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) | ||
| 427 | + (bio->bi_size >> 9)); | ||
| 428 | } else{ | ||
| 429 | sector_t sector = bio->bi_sector; | ||
| 430 | return chunk_sects >= (sector_div(sector, chunk_sects) | ||
| 431 | + (bio->bi_size >> 9)); | ||
| 432 | } | ||
| 399 | } | 433 | } |
| 400 | 434 | ||
| 401 | static int raid0_make_request (struct request_queue *q, struct bio *bio) | 435 | static int raid0_make_request(struct request_queue *q, struct bio *bio) |
| 402 | { | 436 | { |
| 403 | mddev_t *mddev = q->queuedata; | 437 | mddev_t *mddev = q->queuedata; |
| 404 | unsigned int sect_in_chunk, chunksect_bits, chunk_sects; | 438 | unsigned int chunk_sects; |
| 405 | raid0_conf_t *conf = mddev_to_conf(mddev); | 439 | sector_t sector_offset; |
| 406 | struct strip_zone *zone; | 440 | struct strip_zone *zone; |
| 407 | mdk_rdev_t *tmp_dev; | 441 | mdk_rdev_t *tmp_dev; |
| 408 | sector_t chunk; | ||
| 409 | sector_t sector, rsect; | ||
| 410 | const int rw = bio_data_dir(bio); | 442 | const int rw = bio_data_dir(bio); |
| 411 | int cpu; | 443 | int cpu; |
| 412 | 444 | ||
| @@ -421,11 +453,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
| 421 | bio_sectors(bio)); | 453 | bio_sectors(bio)); |
| 422 | part_stat_unlock(); | 454 | part_stat_unlock(); |
| 423 | 455 | ||
| 424 | chunk_sects = mddev->chunk_size >> 9; | 456 | chunk_sects = mddev->chunk_sectors; |
| 425 | chunksect_bits = ffz(~chunk_sects); | 457 | if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { |
| 426 | sector = bio->bi_sector; | 458 | sector_t sector = bio->bi_sector; |
| 427 | |||
| 428 | if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { | ||
| 429 | struct bio_pair *bp; | 459 | struct bio_pair *bp; |
| 430 | /* Sanity check -- queue functions should prevent this happening */ | 460 | /* Sanity check -- queue functions should prevent this happening */ |
| 431 | if (bio->bi_vcnt != 1 || | 461 | if (bio->bi_vcnt != 1 || |
| @@ -434,7 +464,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
| 434 | /* This is a one page bio that upper layers | 464 | /* This is a one page bio that upper layers |
| 435 | * refuse to split for us, so we need to split it. | 465 | * refuse to split for us, so we need to split it. |
| 436 | */ | 466 | */ |
| 437 | bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); | 467 | if (likely(is_power_of_2(chunk_sects))) |
| 468 | bp = bio_split(bio, chunk_sects - (sector & | ||
| 469 | (chunk_sects-1))); | ||
| 470 | else | ||
| 471 | bp = bio_split(bio, chunk_sects - | ||
| 472 | sector_div(sector, chunk_sects)); | ||
| 438 | if (raid0_make_request(q, &bp->bio1)) | 473 | if (raid0_make_request(q, &bp->bio1)) |
| 439 | generic_make_request(&bp->bio1); | 474 | generic_make_request(&bp->bio1); |
| 440 | if (raid0_make_request(q, &bp->bio2)) | 475 | if (raid0_make_request(q, &bp->bio2)) |
| @@ -443,34 +478,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) | |||
| 443 | bio_pair_release(bp); | 478 | bio_pair_release(bp); |
| 444 | return 0; | 479 | return 0; |
| 445 | } | 480 | } |
| 446 | |||
| 447 | |||
| 448 | { | ||
| 449 | sector_t x = sector >> conf->sector_shift; | ||
| 450 | sector_div(x, (u32)conf->spacing); | ||
| 451 | zone = conf->hash_table[x]; | ||
| 452 | } | ||
| 453 | 481 | ||
| 454 | while (sector >= zone->zone_start + zone->sectors) | 482 | sector_offset = bio->bi_sector; |
| 455 | zone++; | 483 | zone = find_zone(mddev->private, §or_offset); |
| 456 | 484 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, | |
| 457 | sect_in_chunk = bio->bi_sector & (chunk_sects - 1); | 485 | §or_offset); |
| 458 | |||
| 459 | |||
| 460 | { | ||
| 461 | sector_t x = (sector - zone->zone_start) >> chunksect_bits; | ||
| 462 | |||
| 463 | sector_div(x, zone->nb_dev); | ||
| 464 | chunk = x; | ||
| 465 | |||
| 466 | x = sector >> chunksect_bits; | ||
| 467 | tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; | ||
| 468 | } | ||
| 469 | rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; | ||
| 470 | |||
| 471 | bio->bi_bdev = tmp_dev->bdev; | 486 | bio->bi_bdev = tmp_dev->bdev; |
| 472 | bio->bi_sector = rsect + tmp_dev->data_offset; | 487 | bio->bi_sector = sector_offset + zone->dev_start + |
| 473 | 488 | tmp_dev->data_offset; | |
| 474 | /* | 489 | /* |
| 475 | * Let the main block layer submit the IO and resolve recursion: | 490 | * Let the main block layer submit the IO and resolve recursion: |
| 476 | */ | 491 | */ |
| @@ -485,31 +500,35 @@ bad_map: | |||
| 485 | return 0; | 500 | return 0; |
| 486 | } | 501 | } |
| 487 | 502 | ||
| 488 | static void raid0_status (struct seq_file *seq, mddev_t *mddev) | 503 | static void raid0_status(struct seq_file *seq, mddev_t *mddev) |
| 489 | { | 504 | { |
| 490 | #undef MD_DEBUG | 505 | #undef MD_DEBUG |
| 491 | #ifdef MD_DEBUG | 506 | #ifdef MD_DEBUG |
| 492 | int j, k, h; | 507 | int j, k, h; |
| 493 | char b[BDEVNAME_SIZE]; | 508 | char b[BDEVNAME_SIZE]; |
| 494 | raid0_conf_t *conf = mddev_to_conf(mddev); | 509 | raid0_conf_t *conf = mddev->private; |
| 495 | 510 | ||
| 511 | sector_t zone_size; | ||
| 512 | sector_t zone_start = 0; | ||
| 496 | h = 0; | 513 | h = 0; |
| 514 | |||
| 497 | for (j = 0; j < conf->nr_strip_zones; j++) { | 515 | for (j = 0; j < conf->nr_strip_zones; j++) { |
| 498 | seq_printf(seq, " z%d", j); | 516 | seq_printf(seq, " z%d", j); |
| 499 | if (conf->hash_table[h] == conf->strip_zone+j) | ||
| 500 | seq_printf(seq, "(h%d)", h++); | ||
| 501 | seq_printf(seq, "=["); | 517 | seq_printf(seq, "=["); |
| 502 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | 518 | for (k = 0; k < conf->strip_zone[j].nb_dev; k++) |
| 503 | seq_printf(seq, "%s/", bdevname( | 519 | seq_printf(seq, "%s/", bdevname( |
| 504 | conf->strip_zone[j].dev[k]->bdev,b)); | 520 | conf->devlist[j*mddev->raid_disks + k] |
| 505 | 521 | ->bdev, b)); | |
| 506 | seq_printf(seq, "] zs=%d ds=%d s=%d\n", | 522 | |
| 507 | conf->strip_zone[j].zone_start, | 523 | zone_size = conf->strip_zone[j].zone_end - zone_start; |
| 508 | conf->strip_zone[j].dev_start, | 524 | seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", |
| 509 | conf->strip_zone[j].sectors); | 525 | (unsigned long long)zone_start>>1, |
| 526 | (unsigned long long)conf->strip_zone[j].dev_start>>1, | ||
| 527 | (unsigned long long)zone_size>>1); | ||
| 528 | zone_start = conf->strip_zone[j].zone_end; | ||
| 510 | } | 529 | } |
| 511 | #endif | 530 | #endif |
| 512 | seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); | 531 | seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); |
| 513 | return; | 532 | return; |
| 514 | } | 533 | } |
| 515 | 534 | ||
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 824b12eb1d4f..91f8e876ee64 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h | |||
| @@ -3,26 +3,18 @@ | |||
| 3 | 3 | ||
| 4 | struct strip_zone | 4 | struct strip_zone |
| 5 | { | 5 | { |
| 6 | sector_t zone_start; /* Zone offset in md_dev (in sectors) */ | 6 | sector_t zone_end; /* Start of the next zone (in sectors) */ |
| 7 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ | 7 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ |
| 8 | sector_t sectors; /* Zone size in sectors */ | ||
| 9 | int nb_dev; /* # of devices attached to the zone */ | 8 | int nb_dev; /* # of devices attached to the zone */ |
| 10 | mdk_rdev_t **dev; /* Devices attached to the zone */ | ||
| 11 | }; | 9 | }; |
| 12 | 10 | ||
| 13 | struct raid0_private_data | 11 | struct raid0_private_data |
| 14 | { | 12 | { |
| 15 | struct strip_zone **hash_table; /* Table of indexes into strip_zone */ | ||
| 16 | struct strip_zone *strip_zone; | 13 | struct strip_zone *strip_zone; |
| 17 | mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ | 14 | mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ |
| 18 | int nr_strip_zones; | 15 | int nr_strip_zones; |
| 19 | |||
| 20 | sector_t spacing; | ||
| 21 | int sector_shift; /* shift this before divide by spacing */ | ||
| 22 | }; | 16 | }; |
| 23 | 17 | ||
| 24 | typedef struct raid0_private_data raid0_conf_t; | 18 | typedef struct raid0_private_data raid0_conf_t; |
| 25 | 19 | ||
| 26 | #define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) | ||
| 27 | |||
| 28 | #endif | 20 | #endif |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e23758b4a34e..89939a7aef57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -182,7 +182,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) | |||
| 182 | 182 | ||
| 183 | static void free_r1bio(r1bio_t *r1_bio) | 183 | static void free_r1bio(r1bio_t *r1_bio) |
| 184 | { | 184 | { |
| 185 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 185 | conf_t *conf = r1_bio->mddev->private; |
| 186 | 186 | ||
| 187 | /* | 187 | /* |
| 188 | * Wake up any possible resync thread that waits for the device | 188 | * Wake up any possible resync thread that waits for the device |
| @@ -196,7 +196,7 @@ static void free_r1bio(r1bio_t *r1_bio) | |||
| 196 | 196 | ||
| 197 | static void put_buf(r1bio_t *r1_bio) | 197 | static void put_buf(r1bio_t *r1_bio) |
| 198 | { | 198 | { |
| 199 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 199 | conf_t *conf = r1_bio->mddev->private; |
| 200 | int i; | 200 | int i; |
| 201 | 201 | ||
| 202 | for (i=0; i<conf->raid_disks; i++) { | 202 | for (i=0; i<conf->raid_disks; i++) { |
| @@ -214,7 +214,7 @@ static void reschedule_retry(r1bio_t *r1_bio) | |||
| 214 | { | 214 | { |
| 215 | unsigned long flags; | 215 | unsigned long flags; |
| 216 | mddev_t *mddev = r1_bio->mddev; | 216 | mddev_t *mddev = r1_bio->mddev; |
| 217 | conf_t *conf = mddev_to_conf(mddev); | 217 | conf_t *conf = mddev->private; |
| 218 | 218 | ||
| 219 | spin_lock_irqsave(&conf->device_lock, flags); | 219 | spin_lock_irqsave(&conf->device_lock, flags); |
| 220 | list_add(&r1_bio->retry_list, &conf->retry_list); | 220 | list_add(&r1_bio->retry_list, &conf->retry_list); |
| @@ -253,7 +253,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) | |||
| 253 | */ | 253 | */ |
| 254 | static inline void update_head_pos(int disk, r1bio_t *r1_bio) | 254 | static inline void update_head_pos(int disk, r1bio_t *r1_bio) |
| 255 | { | 255 | { |
| 256 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 256 | conf_t *conf = r1_bio->mddev->private; |
| 257 | 257 | ||
| 258 | conf->mirrors[disk].head_position = | 258 | conf->mirrors[disk].head_position = |
| 259 | r1_bio->sector + (r1_bio->sectors); | 259 | r1_bio->sector + (r1_bio->sectors); |
| @@ -264,7 +264,7 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
| 264 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 264 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 265 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 265 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
| 266 | int mirror; | 266 | int mirror; |
| 267 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 267 | conf_t *conf = r1_bio->mddev->private; |
| 268 | 268 | ||
| 269 | mirror = r1_bio->read_disk; | 269 | mirror = r1_bio->read_disk; |
| 270 | /* | 270 | /* |
| @@ -309,7 +309,7 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
| 309 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 309 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 310 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 310 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
| 311 | int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); | 311 | int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); |
| 312 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 312 | conf_t *conf = r1_bio->mddev->private; |
| 313 | struct bio *to_put = NULL; | 313 | struct bio *to_put = NULL; |
| 314 | 314 | ||
| 315 | 315 | ||
| @@ -541,7 +541,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
| 541 | 541 | ||
| 542 | static void unplug_slaves(mddev_t *mddev) | 542 | static void unplug_slaves(mddev_t *mddev) |
| 543 | { | 543 | { |
| 544 | conf_t *conf = mddev_to_conf(mddev); | 544 | conf_t *conf = mddev->private; |
| 545 | int i; | 545 | int i; |
| 546 | 546 | ||
| 547 | rcu_read_lock(); | 547 | rcu_read_lock(); |
| @@ -573,7 +573,7 @@ static void raid1_unplug(struct request_queue *q) | |||
| 573 | static int raid1_congested(void *data, int bits) | 573 | static int raid1_congested(void *data, int bits) |
| 574 | { | 574 | { |
| 575 | mddev_t *mddev = data; | 575 | mddev_t *mddev = data; |
| 576 | conf_t *conf = mddev_to_conf(mddev); | 576 | conf_t *conf = mddev->private; |
| 577 | int i, ret = 0; | 577 | int i, ret = 0; |
| 578 | 578 | ||
| 579 | rcu_read_lock(); | 579 | rcu_read_lock(); |
| @@ -772,7 +772,7 @@ do_sync_io: | |||
| 772 | static int make_request(struct request_queue *q, struct bio * bio) | 772 | static int make_request(struct request_queue *q, struct bio * bio) |
| 773 | { | 773 | { |
| 774 | mddev_t *mddev = q->queuedata; | 774 | mddev_t *mddev = q->queuedata; |
| 775 | conf_t *conf = mddev_to_conf(mddev); | 775 | conf_t *conf = mddev->private; |
| 776 | mirror_info_t *mirror; | 776 | mirror_info_t *mirror; |
| 777 | r1bio_t *r1_bio; | 777 | r1bio_t *r1_bio; |
| 778 | struct bio *read_bio; | 778 | struct bio *read_bio; |
| @@ -991,7 +991,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
| 991 | 991 | ||
| 992 | static void status(struct seq_file *seq, mddev_t *mddev) | 992 | static void status(struct seq_file *seq, mddev_t *mddev) |
| 993 | { | 993 | { |
| 994 | conf_t *conf = mddev_to_conf(mddev); | 994 | conf_t *conf = mddev->private; |
| 995 | int i; | 995 | int i; |
| 996 | 996 | ||
| 997 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, | 997 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, |
| @@ -1010,7 +1010,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
| 1010 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) | 1010 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) |
| 1011 | { | 1011 | { |
| 1012 | char b[BDEVNAME_SIZE]; | 1012 | char b[BDEVNAME_SIZE]; |
| 1013 | conf_t *conf = mddev_to_conf(mddev); | 1013 | conf_t *conf = mddev->private; |
| 1014 | 1014 | ||
| 1015 | /* | 1015 | /* |
| 1016 | * If it is not operational, then we have already marked it as dead | 1016 | * If it is not operational, then we have already marked it as dead |
| @@ -1214,7 +1214,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
| 1214 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1214 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 1215 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 1215 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
| 1216 | mddev_t *mddev = r1_bio->mddev; | 1216 | mddev_t *mddev = r1_bio->mddev; |
| 1217 | conf_t *conf = mddev_to_conf(mddev); | 1217 | conf_t *conf = mddev->private; |
| 1218 | int i; | 1218 | int i; |
| 1219 | int mirror=0; | 1219 | int mirror=0; |
| 1220 | 1220 | ||
| @@ -1248,7 +1248,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
| 1248 | 1248 | ||
| 1249 | static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | 1249 | static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) |
| 1250 | { | 1250 | { |
| 1251 | conf_t *conf = mddev_to_conf(mddev); | 1251 | conf_t *conf = mddev->private; |
| 1252 | int i; | 1252 | int i; |
| 1253 | int disks = conf->raid_disks; | 1253 | int disks = conf->raid_disks; |
| 1254 | struct bio *bio, *wbio; | 1254 | struct bio *bio, *wbio; |
| @@ -1562,7 +1562,7 @@ static void raid1d(mddev_t *mddev) | |||
| 1562 | r1bio_t *r1_bio; | 1562 | r1bio_t *r1_bio; |
| 1563 | struct bio *bio; | 1563 | struct bio *bio; |
| 1564 | unsigned long flags; | 1564 | unsigned long flags; |
| 1565 | conf_t *conf = mddev_to_conf(mddev); | 1565 | conf_t *conf = mddev->private; |
| 1566 | struct list_head *head = &conf->retry_list; | 1566 | struct list_head *head = &conf->retry_list; |
| 1567 | int unplug=0; | 1567 | int unplug=0; |
| 1568 | mdk_rdev_t *rdev; | 1568 | mdk_rdev_t *rdev; |
| @@ -1585,7 +1585,7 @@ static void raid1d(mddev_t *mddev) | |||
| 1585 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1585 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 1586 | 1586 | ||
| 1587 | mddev = r1_bio->mddev; | 1587 | mddev = r1_bio->mddev; |
| 1588 | conf = mddev_to_conf(mddev); | 1588 | conf = mddev->private; |
| 1589 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1589 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
| 1590 | sync_request_write(mddev, r1_bio); | 1590 | sync_request_write(mddev, r1_bio); |
| 1591 | unplug = 1; | 1591 | unplug = 1; |
| @@ -1706,7 +1706,7 @@ static int init_resync(conf_t *conf) | |||
| 1706 | 1706 | ||
| 1707 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) | 1707 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
| 1708 | { | 1708 | { |
| 1709 | conf_t *conf = mddev_to_conf(mddev); | 1709 | conf_t *conf = mddev->private; |
| 1710 | r1bio_t *r1_bio; | 1710 | r1bio_t *r1_bio; |
| 1711 | struct bio *bio; | 1711 | struct bio *bio; |
| 1712 | sector_t max_sector, nr_sectors; | 1712 | sector_t max_sector, nr_sectors; |
| @@ -2052,6 +2052,10 @@ static int run(mddev_t *mddev) | |||
| 2052 | goto out_free_conf; | 2052 | goto out_free_conf; |
| 2053 | } | 2053 | } |
| 2054 | 2054 | ||
| 2055 | if (mddev->recovery_cp != MaxSector) | ||
| 2056 | printk(KERN_NOTICE "raid1: %s is not clean" | ||
| 2057 | " -- starting background reconstruction\n", | ||
| 2058 | mdname(mddev)); | ||
| 2055 | printk(KERN_INFO | 2059 | printk(KERN_INFO |
| 2056 | "raid1: raid set %s active with %d out of %d mirrors\n", | 2060 | "raid1: raid set %s active with %d out of %d mirrors\n", |
| 2057 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 2061 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
| @@ -2087,7 +2091,7 @@ out: | |||
| 2087 | 2091 | ||
| 2088 | static int stop(mddev_t *mddev) | 2092 | static int stop(mddev_t *mddev) |
| 2089 | { | 2093 | { |
| 2090 | conf_t *conf = mddev_to_conf(mddev); | 2094 | conf_t *conf = mddev->private; |
| 2091 | struct bitmap *bitmap = mddev->bitmap; | 2095 | struct bitmap *bitmap = mddev->bitmap; |
| 2092 | int behind_wait = 0; | 2096 | int behind_wait = 0; |
| 2093 | 2097 | ||
| @@ -2155,16 +2159,16 @@ static int raid1_reshape(mddev_t *mddev) | |||
| 2155 | mempool_t *newpool, *oldpool; | 2159 | mempool_t *newpool, *oldpool; |
| 2156 | struct pool_info *newpoolinfo; | 2160 | struct pool_info *newpoolinfo; |
| 2157 | mirror_info_t *newmirrors; | 2161 | mirror_info_t *newmirrors; |
| 2158 | conf_t *conf = mddev_to_conf(mddev); | 2162 | conf_t *conf = mddev->private; |
| 2159 | int cnt, raid_disks; | 2163 | int cnt, raid_disks; |
| 2160 | unsigned long flags; | 2164 | unsigned long flags; |
| 2161 | int d, d2, err; | 2165 | int d, d2, err; |
| 2162 | 2166 | ||
| 2163 | /* Cannot change chunk_size, layout, or level */ | 2167 | /* Cannot change chunk_size, layout, or level */ |
| 2164 | if (mddev->chunk_size != mddev->new_chunk || | 2168 | if (mddev->chunk_sectors != mddev->new_chunk_sectors || |
| 2165 | mddev->layout != mddev->new_layout || | 2169 | mddev->layout != mddev->new_layout || |
| 2166 | mddev->level != mddev->new_level) { | 2170 | mddev->level != mddev->new_level) { |
| 2167 | mddev->new_chunk = mddev->chunk_size; | 2171 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
| 2168 | mddev->new_layout = mddev->layout; | 2172 | mddev->new_layout = mddev->layout; |
| 2169 | mddev->new_level = mddev->level; | 2173 | mddev->new_level = mddev->level; |
| 2170 | return -EINVAL; | 2174 | return -EINVAL; |
| @@ -2252,7 +2256,7 @@ static int raid1_reshape(mddev_t *mddev) | |||
| 2252 | 2256 | ||
| 2253 | static void raid1_quiesce(mddev_t *mddev, int state) | 2257 | static void raid1_quiesce(mddev_t *mddev, int state) |
| 2254 | { | 2258 | { |
| 2255 | conf_t *conf = mddev_to_conf(mddev); | 2259 | conf_t *conf = mddev->private; |
| 2256 | 2260 | ||
| 2257 | switch(state) { | 2261 | switch(state) { |
| 2258 | case 1: | 2262 | case 1: |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 1620eea3d57c..e87b84deff68 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
| @@ -64,12 +64,6 @@ struct r1_private_data_s { | |||
| 64 | typedef struct r1_private_data_s conf_t; | 64 | typedef struct r1_private_data_s conf_t; |
| 65 | 65 | ||
| 66 | /* | 66 | /* |
| 67 | * this is the only point in the RAID code where we violate | ||
| 68 | * C type safety. mddev->private is an 'opaque' pointer. | ||
| 69 | */ | ||
| 70 | #define mddev_to_conf(mddev) ((conf_t *) mddev->private) | ||
| 71 | |||
| 72 | /* | ||
| 73 | * this is our 'private' RAID1 bio. | 67 | * this is our 'private' RAID1 bio. |
| 74 | * | 68 | * |
| 75 | * it contains information about what kind of IO operations were started | 69 | * it contains information about what kind of IO operations were started |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 750550c1166f..ae12ceafe10c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -188,7 +188,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) | |||
| 188 | 188 | ||
| 189 | static void free_r10bio(r10bio_t *r10_bio) | 189 | static void free_r10bio(r10bio_t *r10_bio) |
| 190 | { | 190 | { |
| 191 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 191 | conf_t *conf = r10_bio->mddev->private; |
| 192 | 192 | ||
| 193 | /* | 193 | /* |
| 194 | * Wake up any possible resync thread that waits for the device | 194 | * Wake up any possible resync thread that waits for the device |
| @@ -202,7 +202,7 @@ static void free_r10bio(r10bio_t *r10_bio) | |||
| 202 | 202 | ||
| 203 | static void put_buf(r10bio_t *r10_bio) | 203 | static void put_buf(r10bio_t *r10_bio) |
| 204 | { | 204 | { |
| 205 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 205 | conf_t *conf = r10_bio->mddev->private; |
| 206 | 206 | ||
| 207 | mempool_free(r10_bio, conf->r10buf_pool); | 207 | mempool_free(r10_bio, conf->r10buf_pool); |
| 208 | 208 | ||
| @@ -213,7 +213,7 @@ static void reschedule_retry(r10bio_t *r10_bio) | |||
| 213 | { | 213 | { |
| 214 | unsigned long flags; | 214 | unsigned long flags; |
| 215 | mddev_t *mddev = r10_bio->mddev; | 215 | mddev_t *mddev = r10_bio->mddev; |
| 216 | conf_t *conf = mddev_to_conf(mddev); | 216 | conf_t *conf = mddev->private; |
| 217 | 217 | ||
| 218 | spin_lock_irqsave(&conf->device_lock, flags); | 218 | spin_lock_irqsave(&conf->device_lock, flags); |
| 219 | list_add(&r10_bio->retry_list, &conf->retry_list); | 219 | list_add(&r10_bio->retry_list, &conf->retry_list); |
| @@ -245,7 +245,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio) | |||
| 245 | */ | 245 | */ |
| 246 | static inline void update_head_pos(int slot, r10bio_t *r10_bio) | 246 | static inline void update_head_pos(int slot, r10bio_t *r10_bio) |
| 247 | { | 247 | { |
| 248 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 248 | conf_t *conf = r10_bio->mddev->private; |
| 249 | 249 | ||
| 250 | conf->mirrors[r10_bio->devs[slot].devnum].head_position = | 250 | conf->mirrors[r10_bio->devs[slot].devnum].head_position = |
| 251 | r10_bio->devs[slot].addr + (r10_bio->sectors); | 251 | r10_bio->devs[slot].addr + (r10_bio->sectors); |
| @@ -256,7 +256,7 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
| 256 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 256 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 257 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 257 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); |
| 258 | int slot, dev; | 258 | int slot, dev; |
| 259 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 259 | conf_t *conf = r10_bio->mddev->private; |
| 260 | 260 | ||
| 261 | 261 | ||
| 262 | slot = r10_bio->read_slot; | 262 | slot = r10_bio->read_slot; |
| @@ -297,7 +297,7 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
| 297 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 297 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 298 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 298 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); |
| 299 | int slot, dev; | 299 | int slot, dev; |
| 300 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 300 | conf_t *conf = r10_bio->mddev->private; |
| 301 | 301 | ||
| 302 | for (slot = 0; slot < conf->copies; slot++) | 302 | for (slot = 0; slot < conf->copies; slot++) |
| 303 | if (r10_bio->devs[slot].bio == bio) | 303 | if (r10_bio->devs[slot].bio == bio) |
| @@ -461,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, | |||
| 461 | mddev_t *mddev = q->queuedata; | 461 | mddev_t *mddev = q->queuedata; |
| 462 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 462 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
| 463 | int max; | 463 | int max; |
| 464 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 464 | unsigned int chunk_sectors = mddev->chunk_sectors; |
| 465 | unsigned int bio_sectors = bvm->bi_size >> 9; | 465 | unsigned int bio_sectors = bvm->bi_size >> 9; |
| 466 | 466 | ||
| 467 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 467 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; |
| @@ -596,7 +596,7 @@ rb_out: | |||
| 596 | 596 | ||
| 597 | static void unplug_slaves(mddev_t *mddev) | 597 | static void unplug_slaves(mddev_t *mddev) |
| 598 | { | 598 | { |
| 599 | conf_t *conf = mddev_to_conf(mddev); | 599 | conf_t *conf = mddev->private; |
| 600 | int i; | 600 | int i; |
| 601 | 601 | ||
| 602 | rcu_read_lock(); | 602 | rcu_read_lock(); |
| @@ -628,7 +628,7 @@ static void raid10_unplug(struct request_queue *q) | |||
| 628 | static int raid10_congested(void *data, int bits) | 628 | static int raid10_congested(void *data, int bits) |
| 629 | { | 629 | { |
| 630 | mddev_t *mddev = data; | 630 | mddev_t *mddev = data; |
| 631 | conf_t *conf = mddev_to_conf(mddev); | 631 | conf_t *conf = mddev->private; |
| 632 | int i, ret = 0; | 632 | int i, ret = 0; |
| 633 | 633 | ||
| 634 | rcu_read_lock(); | 634 | rcu_read_lock(); |
| @@ -788,7 +788,7 @@ static void unfreeze_array(conf_t *conf) | |||
| 788 | static int make_request(struct request_queue *q, struct bio * bio) | 788 | static int make_request(struct request_queue *q, struct bio * bio) |
| 789 | { | 789 | { |
| 790 | mddev_t *mddev = q->queuedata; | 790 | mddev_t *mddev = q->queuedata; |
| 791 | conf_t *conf = mddev_to_conf(mddev); | 791 | conf_t *conf = mddev->private; |
| 792 | mirror_info_t *mirror; | 792 | mirror_info_t *mirror; |
| 793 | r10bio_t *r10_bio; | 793 | r10bio_t *r10_bio; |
| 794 | struct bio *read_bio; | 794 | struct bio *read_bio; |
| @@ -981,11 +981,11 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
| 981 | 981 | ||
| 982 | static void status(struct seq_file *seq, mddev_t *mddev) | 982 | static void status(struct seq_file *seq, mddev_t *mddev) |
| 983 | { | 983 | { |
| 984 | conf_t *conf = mddev_to_conf(mddev); | 984 | conf_t *conf = mddev->private; |
| 985 | int i; | 985 | int i; |
| 986 | 986 | ||
| 987 | if (conf->near_copies < conf->raid_disks) | 987 | if (conf->near_copies < conf->raid_disks) |
| 988 | seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); | 988 | seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); |
| 989 | if (conf->near_copies > 1) | 989 | if (conf->near_copies > 1) |
| 990 | seq_printf(seq, " %d near-copies", conf->near_copies); | 990 | seq_printf(seq, " %d near-copies", conf->near_copies); |
| 991 | if (conf->far_copies > 1) { | 991 | if (conf->far_copies > 1) { |
| @@ -1006,7 +1006,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
| 1006 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) | 1006 | static void error(mddev_t *mddev, mdk_rdev_t *rdev) |
| 1007 | { | 1007 | { |
| 1008 | char b[BDEVNAME_SIZE]; | 1008 | char b[BDEVNAME_SIZE]; |
| 1009 | conf_t *conf = mddev_to_conf(mddev); | 1009 | conf_t *conf = mddev->private; |
| 1010 | 1010 | ||
| 1011 | /* | 1011 | /* |
| 1012 | * If it is not operational, then we have already marked it as dead | 1012 | * If it is not operational, then we have already marked it as dead |
| @@ -1215,7 +1215,7 @@ abort: | |||
| 1215 | static void end_sync_read(struct bio *bio, int error) | 1215 | static void end_sync_read(struct bio *bio, int error) |
| 1216 | { | 1216 | { |
| 1217 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 1217 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); |
| 1218 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 1218 | conf_t *conf = r10_bio->mddev->private; |
| 1219 | int i,d; | 1219 | int i,d; |
| 1220 | 1220 | ||
| 1221 | for (i=0; i<conf->copies; i++) | 1221 | for (i=0; i<conf->copies; i++) |
| @@ -1253,7 +1253,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
| 1253 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1253 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 1254 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 1254 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); |
| 1255 | mddev_t *mddev = r10_bio->mddev; | 1255 | mddev_t *mddev = r10_bio->mddev; |
| 1256 | conf_t *conf = mddev_to_conf(mddev); | 1256 | conf_t *conf = mddev->private; |
| 1257 | int i,d; | 1257 | int i,d; |
| 1258 | 1258 | ||
| 1259 | for (i = 0; i < conf->copies; i++) | 1259 | for (i = 0; i < conf->copies; i++) |
| @@ -1300,7 +1300,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
| 1300 | */ | 1300 | */ |
| 1301 | static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) | 1301 | static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) |
| 1302 | { | 1302 | { |
| 1303 | conf_t *conf = mddev_to_conf(mddev); | 1303 | conf_t *conf = mddev->private; |
| 1304 | int i, first; | 1304 | int i, first; |
| 1305 | struct bio *tbio, *fbio; | 1305 | struct bio *tbio, *fbio; |
| 1306 | 1306 | ||
| @@ -1400,7 +1400,7 @@ done: | |||
| 1400 | 1400 | ||
| 1401 | static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) | 1401 | static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) |
| 1402 | { | 1402 | { |
| 1403 | conf_t *conf = mddev_to_conf(mddev); | 1403 | conf_t *conf = mddev->private; |
| 1404 | int i, d; | 1404 | int i, d; |
| 1405 | struct bio *bio, *wbio; | 1405 | struct bio *bio, *wbio; |
| 1406 | 1406 | ||
| @@ -1549,7 +1549,7 @@ static void raid10d(mddev_t *mddev) | |||
| 1549 | r10bio_t *r10_bio; | 1549 | r10bio_t *r10_bio; |
| 1550 | struct bio *bio; | 1550 | struct bio *bio; |
| 1551 | unsigned long flags; | 1551 | unsigned long flags; |
| 1552 | conf_t *conf = mddev_to_conf(mddev); | 1552 | conf_t *conf = mddev->private; |
| 1553 | struct list_head *head = &conf->retry_list; | 1553 | struct list_head *head = &conf->retry_list; |
| 1554 | int unplug=0; | 1554 | int unplug=0; |
| 1555 | mdk_rdev_t *rdev; | 1555 | mdk_rdev_t *rdev; |
| @@ -1572,7 +1572,7 @@ static void raid10d(mddev_t *mddev) | |||
| 1572 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1572 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 1573 | 1573 | ||
| 1574 | mddev = r10_bio->mddev; | 1574 | mddev = r10_bio->mddev; |
| 1575 | conf = mddev_to_conf(mddev); | 1575 | conf = mddev->private; |
| 1576 | if (test_bit(R10BIO_IsSync, &r10_bio->state)) { | 1576 | if (test_bit(R10BIO_IsSync, &r10_bio->state)) { |
| 1577 | sync_request_write(mddev, r10_bio); | 1577 | sync_request_write(mddev, r10_bio); |
| 1578 | unplug = 1; | 1578 | unplug = 1; |
| @@ -1680,7 +1680,7 @@ static int init_resync(conf_t *conf) | |||
| 1680 | 1680 | ||
| 1681 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) | 1681 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
| 1682 | { | 1682 | { |
| 1683 | conf_t *conf = mddev_to_conf(mddev); | 1683 | conf_t *conf = mddev->private; |
| 1684 | r10bio_t *r10_bio; | 1684 | r10bio_t *r10_bio; |
| 1685 | struct bio *biolist = NULL, *bio; | 1685 | struct bio *biolist = NULL, *bio; |
| 1686 | sector_t max_sector, nr_sectors; | 1686 | sector_t max_sector, nr_sectors; |
| @@ -2026,7 +2026,7 @@ static sector_t | |||
| 2026 | raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | 2026 | raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) |
| 2027 | { | 2027 | { |
| 2028 | sector_t size; | 2028 | sector_t size; |
| 2029 | conf_t *conf = mddev_to_conf(mddev); | 2029 | conf_t *conf = mddev->private; |
| 2030 | 2030 | ||
| 2031 | if (!raid_disks) | 2031 | if (!raid_disks) |
| 2032 | raid_disks = mddev->raid_disks; | 2032 | raid_disks = mddev->raid_disks; |
| @@ -2050,9 +2050,10 @@ static int run(mddev_t *mddev) | |||
| 2050 | int nc, fc, fo; | 2050 | int nc, fc, fo; |
| 2051 | sector_t stride, size; | 2051 | sector_t stride, size; |
| 2052 | 2052 | ||
| 2053 | if (mddev->chunk_size < PAGE_SIZE) { | 2053 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || |
| 2054 | !is_power_of_2(mddev->chunk_sectors)) { | ||
| 2054 | printk(KERN_ERR "md/raid10: chunk size must be " | 2055 | printk(KERN_ERR "md/raid10: chunk size must be " |
| 2055 | "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); | 2056 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); |
| 2056 | return -EINVAL; | 2057 | return -EINVAL; |
| 2057 | } | 2058 | } |
| 2058 | 2059 | ||
| @@ -2095,8 +2096,8 @@ static int run(mddev_t *mddev) | |||
| 2095 | conf->far_copies = fc; | 2096 | conf->far_copies = fc; |
| 2096 | conf->copies = nc*fc; | 2097 | conf->copies = nc*fc; |
| 2097 | conf->far_offset = fo; | 2098 | conf->far_offset = fo; |
| 2098 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; | 2099 | conf->chunk_mask = mddev->chunk_sectors - 1; |
| 2099 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; | 2100 | conf->chunk_shift = ffz(~mddev->chunk_sectors); |
| 2100 | size = mddev->dev_sectors >> conf->chunk_shift; | 2101 | size = mddev->dev_sectors >> conf->chunk_shift; |
| 2101 | sector_div(size, fc); | 2102 | sector_div(size, fc); |
| 2102 | size = size * conf->raid_disks; | 2103 | size = size * conf->raid_disks; |
| @@ -2185,6 +2186,10 @@ static int run(mddev_t *mddev) | |||
| 2185 | goto out_free_conf; | 2186 | goto out_free_conf; |
| 2186 | } | 2187 | } |
| 2187 | 2188 | ||
| 2189 | if (mddev->recovery_cp != MaxSector) | ||
| 2190 | printk(KERN_NOTICE "raid10: %s is not clean" | ||
| 2191 | " -- starting background reconstruction\n", | ||
| 2192 | mdname(mddev)); | ||
| 2188 | printk(KERN_INFO | 2193 | printk(KERN_INFO |
| 2189 | "raid10: raid set %s active with %d out of %d devices\n", | 2194 | "raid10: raid set %s active with %d out of %d devices\n", |
| 2190 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 2195 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
| @@ -2204,7 +2209,8 @@ static int run(mddev_t *mddev) | |||
| 2204 | * maybe... | 2209 | * maybe... |
| 2205 | */ | 2210 | */ |
| 2206 | { | 2211 | { |
| 2207 | int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); | 2212 | int stripe = conf->raid_disks * |
| 2213 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
| 2208 | stripe /= conf->near_copies; | 2214 | stripe /= conf->near_copies; |
| 2209 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) | 2215 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) |
| 2210 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 2216 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
| @@ -2227,7 +2233,7 @@ out: | |||
| 2227 | 2233 | ||
| 2228 | static int stop(mddev_t *mddev) | 2234 | static int stop(mddev_t *mddev) |
| 2229 | { | 2235 | { |
| 2230 | conf_t *conf = mddev_to_conf(mddev); | 2236 | conf_t *conf = mddev->private; |
| 2231 | 2237 | ||
| 2232 | raise_barrier(conf, 0); | 2238 | raise_barrier(conf, 0); |
| 2233 | lower_barrier(conf); | 2239 | lower_barrier(conf); |
| @@ -2245,7 +2251,7 @@ static int stop(mddev_t *mddev) | |||
| 2245 | 2251 | ||
| 2246 | static void raid10_quiesce(mddev_t *mddev, int state) | 2252 | static void raid10_quiesce(mddev_t *mddev, int state) |
| 2247 | { | 2253 | { |
| 2248 | conf_t *conf = mddev_to_conf(mddev); | 2254 | conf_t *conf = mddev->private; |
| 2249 | 2255 | ||
| 2250 | switch(state) { | 2256 | switch(state) { |
| 2251 | case 1: | 2257 | case 1: |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 244dbe507a54..59cd1efb8d30 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
| @@ -62,12 +62,6 @@ struct r10_private_data_s { | |||
| 62 | typedef struct r10_private_data_s conf_t; | 62 | typedef struct r10_private_data_s conf_t; |
| 63 | 63 | ||
| 64 | /* | 64 | /* |
| 65 | * this is the only point in the RAID code where we violate | ||
| 66 | * C type safety. mddev->private is an 'opaque' pointer. | ||
| 67 | */ | ||
| 68 | #define mddev_to_conf(mddev) ((conf_t *) mddev->private) | ||
| 69 | |||
| 70 | /* | ||
| 71 | * this is our 'private' RAID10 bio. | 65 | * this is our 'private' RAID10 bio. |
| 72 | * | 66 | * |
| 73 | * it contains information about what kind of IO operations were started | 67 | * it contains information about what kind of IO operations were started |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bef876698232..f9f991e6e138 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
| 1274 | sector_t new_sector; | 1274 | sector_t new_sector; |
| 1275 | int algorithm = previous ? conf->prev_algo | 1275 | int algorithm = previous ? conf->prev_algo |
| 1276 | : conf->algorithm; | 1276 | : conf->algorithm; |
| 1277 | int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) | 1277 | int sectors_per_chunk = previous ? conf->prev_chunk_sectors |
| 1278 | : (conf->chunk_size >> 9); | 1278 | : conf->chunk_sectors; |
| 1279 | int raid_disks = previous ? conf->previous_raid_disks | 1279 | int raid_disks = previous ? conf->previous_raid_disks |
| 1280 | : conf->raid_disks; | 1280 | : conf->raid_disks; |
| 1281 | int data_disks = raid_disks - conf->max_degraded; | 1281 | int data_disks = raid_disks - conf->max_degraded; |
| @@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
| 1480 | int raid_disks = sh->disks; | 1480 | int raid_disks = sh->disks; |
| 1481 | int data_disks = raid_disks - conf->max_degraded; | 1481 | int data_disks = raid_disks - conf->max_degraded; |
| 1482 | sector_t new_sector = sh->sector, check; | 1482 | sector_t new_sector = sh->sector, check; |
| 1483 | int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) | 1483 | int sectors_per_chunk = previous ? conf->prev_chunk_sectors |
| 1484 | : (conf->chunk_size >> 9); | 1484 | : conf->chunk_sectors; |
| 1485 | int algorithm = previous ? conf->prev_algo | 1485 | int algorithm = previous ? conf->prev_algo |
| 1486 | : conf->algorithm; | 1486 | : conf->algorithm; |
| 1487 | sector_t stripe; | 1487 | sector_t stripe; |
| @@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, | |||
| 1997 | struct stripe_head *sh) | 1997 | struct stripe_head *sh) |
| 1998 | { | 1998 | { |
| 1999 | int sectors_per_chunk = | 1999 | int sectors_per_chunk = |
| 2000 | previous ? (conf->prev_chunk >> 9) | 2000 | previous ? conf->prev_chunk_sectors : conf->chunk_sectors; |
| 2001 | : (conf->chunk_size >> 9); | ||
| 2002 | int dd_idx; | 2001 | int dd_idx; |
| 2003 | int chunk_offset = sector_div(stripe, sectors_per_chunk); | 2002 | int chunk_offset = sector_div(stripe, sectors_per_chunk); |
| 2004 | int disks = previous ? conf->previous_raid_disks : conf->raid_disks; | 2003 | int disks = previous ? conf->previous_raid_disks : conf->raid_disks; |
| @@ -3284,7 +3283,7 @@ static void activate_bit_delay(raid5_conf_t *conf) | |||
| 3284 | 3283 | ||
| 3285 | static void unplug_slaves(mddev_t *mddev) | 3284 | static void unplug_slaves(mddev_t *mddev) |
| 3286 | { | 3285 | { |
| 3287 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3286 | raid5_conf_t *conf = mddev->private; |
| 3288 | int i; | 3287 | int i; |
| 3289 | 3288 | ||
| 3290 | rcu_read_lock(); | 3289 | rcu_read_lock(); |
| @@ -3308,7 +3307,7 @@ static void unplug_slaves(mddev_t *mddev) | |||
| 3308 | static void raid5_unplug_device(struct request_queue *q) | 3307 | static void raid5_unplug_device(struct request_queue *q) |
| 3309 | { | 3308 | { |
| 3310 | mddev_t *mddev = q->queuedata; | 3309 | mddev_t *mddev = q->queuedata; |
| 3311 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3310 | raid5_conf_t *conf = mddev->private; |
| 3312 | unsigned long flags; | 3311 | unsigned long flags; |
| 3313 | 3312 | ||
| 3314 | spin_lock_irqsave(&conf->device_lock, flags); | 3313 | spin_lock_irqsave(&conf->device_lock, flags); |
| @@ -3327,7 +3326,7 @@ static void raid5_unplug_device(struct request_queue *q) | |||
| 3327 | static int raid5_congested(void *data, int bits) | 3326 | static int raid5_congested(void *data, int bits) |
| 3328 | { | 3327 | { |
| 3329 | mddev_t *mddev = data; | 3328 | mddev_t *mddev = data; |
| 3330 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3329 | raid5_conf_t *conf = mddev->private; |
| 3331 | 3330 | ||
| 3332 | /* No difference between reads and writes. Just check | 3331 | /* No difference between reads and writes. Just check |
| 3333 | * how busy the stripe_cache is | 3332 | * how busy the stripe_cache is |
| @@ -3352,14 +3351,14 @@ static int raid5_mergeable_bvec(struct request_queue *q, | |||
| 3352 | mddev_t *mddev = q->queuedata; | 3351 | mddev_t *mddev = q->queuedata; |
| 3353 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 3352 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
| 3354 | int max; | 3353 | int max; |
| 3355 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 3354 | unsigned int chunk_sectors = mddev->chunk_sectors; |
| 3356 | unsigned int bio_sectors = bvm->bi_size >> 9; | 3355 | unsigned int bio_sectors = bvm->bi_size >> 9; |
| 3357 | 3356 | ||
| 3358 | if ((bvm->bi_rw & 1) == WRITE) | 3357 | if ((bvm->bi_rw & 1) == WRITE) |
| 3359 | return biovec->bv_len; /* always allow writes to be mergeable */ | 3358 | return biovec->bv_len; /* always allow writes to be mergeable */ |
| 3360 | 3359 | ||
| 3361 | if (mddev->new_chunk < mddev->chunk_size) | 3360 | if (mddev->new_chunk_sectors < mddev->chunk_sectors) |
| 3362 | chunk_sectors = mddev->new_chunk >> 9; | 3361 | chunk_sectors = mddev->new_chunk_sectors; |
| 3363 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; | 3362 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; |
| 3364 | if (max < 0) max = 0; | 3363 | if (max < 0) max = 0; |
| 3365 | if (max <= biovec->bv_len && bio_sectors == 0) | 3364 | if (max <= biovec->bv_len && bio_sectors == 0) |
| @@ -3372,11 +3371,11 @@ static int raid5_mergeable_bvec(struct request_queue *q, | |||
| 3372 | static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) | 3371 | static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) |
| 3373 | { | 3372 | { |
| 3374 | sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); | 3373 | sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); |
| 3375 | unsigned int chunk_sectors = mddev->chunk_size >> 9; | 3374 | unsigned int chunk_sectors = mddev->chunk_sectors; |
| 3376 | unsigned int bio_sectors = bio->bi_size >> 9; | 3375 | unsigned int bio_sectors = bio->bi_size >> 9; |
| 3377 | 3376 | ||
| 3378 | if (mddev->new_chunk < mddev->chunk_size) | 3377 | if (mddev->new_chunk_sectors < mddev->chunk_sectors) |
| 3379 | chunk_sectors = mddev->new_chunk >> 9; | 3378 | chunk_sectors = mddev->new_chunk_sectors; |
| 3380 | return chunk_sectors >= | 3379 | return chunk_sectors >= |
| 3381 | ((sector & (chunk_sectors - 1)) + bio_sectors); | 3380 | ((sector & (chunk_sectors - 1)) + bio_sectors); |
| 3382 | } | 3381 | } |
| @@ -3440,7 +3439,7 @@ static void raid5_align_endio(struct bio *bi, int error) | |||
| 3440 | bio_put(bi); | 3439 | bio_put(bi); |
| 3441 | 3440 | ||
| 3442 | mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; | 3441 | mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; |
| 3443 | conf = mddev_to_conf(mddev); | 3442 | conf = mddev->private; |
| 3444 | rdev = (void*)raid_bi->bi_next; | 3443 | rdev = (void*)raid_bi->bi_next; |
| 3445 | raid_bi->bi_next = NULL; | 3444 | raid_bi->bi_next = NULL; |
| 3446 | 3445 | ||
| @@ -3482,7 +3481,7 @@ static int bio_fits_rdev(struct bio *bi) | |||
| 3482 | static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | 3481 | static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) |
| 3483 | { | 3482 | { |
| 3484 | mddev_t *mddev = q->queuedata; | 3483 | mddev_t *mddev = q->queuedata; |
| 3485 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3484 | raid5_conf_t *conf = mddev->private; |
| 3486 | unsigned int dd_idx; | 3485 | unsigned int dd_idx; |
| 3487 | struct bio* align_bi; | 3486 | struct bio* align_bi; |
| 3488 | mdk_rdev_t *rdev; | 3487 | mdk_rdev_t *rdev; |
| @@ -3599,7 +3598,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) | |||
| 3599 | static int make_request(struct request_queue *q, struct bio * bi) | 3598 | static int make_request(struct request_queue *q, struct bio * bi) |
| 3600 | { | 3599 | { |
| 3601 | mddev_t *mddev = q->queuedata; | 3600 | mddev_t *mddev = q->queuedata; |
| 3602 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3601 | raid5_conf_t *conf = mddev->private; |
| 3603 | int dd_idx; | 3602 | int dd_idx; |
| 3604 | sector_t new_sector; | 3603 | sector_t new_sector; |
| 3605 | sector_t logical_sector, last_sector; | 3604 | sector_t logical_sector, last_sector; |
| @@ -3696,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
| 3696 | spin_unlock_irq(&conf->device_lock); | 3695 | spin_unlock_irq(&conf->device_lock); |
| 3697 | if (must_retry) { | 3696 | if (must_retry) { |
| 3698 | release_stripe(sh); | 3697 | release_stripe(sh); |
| 3698 | schedule(); | ||
| 3699 | goto retry; | 3699 | goto retry; |
| 3700 | } | 3700 | } |
| 3701 | } | 3701 | } |
| @@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 3791 | * If old and new chunk sizes differ, we need to process the | 3791 | * If old and new chunk sizes differ, we need to process the |
| 3792 | * largest of these | 3792 | * largest of these |
| 3793 | */ | 3793 | */ |
| 3794 | if (mddev->new_chunk > mddev->chunk_size) | 3794 | if (mddev->new_chunk_sectors > mddev->chunk_sectors) |
| 3795 | reshape_sectors = mddev->new_chunk / 512; | 3795 | reshape_sectors = mddev->new_chunk_sectors; |
| 3796 | else | 3796 | else |
| 3797 | reshape_sectors = mddev->chunk_size / 512; | 3797 | reshape_sectors = mddev->chunk_sectors; |
| 3798 | 3798 | ||
| 3799 | /* we update the metadata when there is more than 3Meg | 3799 | /* we update the metadata when there is more than 3Meg |
| 3800 | * in the block range (that is rather arbitrary, should | 3800 | * in the block range (that is rather arbitrary, should |
| @@ -3917,7 +3917,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 3917 | 1, &dd_idx, NULL); | 3917 | 1, &dd_idx, NULL); |
| 3918 | last_sector = | 3918 | last_sector = |
| 3919 | raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) | 3919 | raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) |
| 3920 | *(new_data_disks) - 1), | 3920 | * new_data_disks - 1), |
| 3921 | 1, &dd_idx, NULL); | 3921 | 1, &dd_idx, NULL); |
| 3922 | if (last_sector >= mddev->dev_sectors) | 3922 | if (last_sector >= mddev->dev_sectors) |
| 3923 | last_sector = mddev->dev_sectors - 1; | 3923 | last_sector = mddev->dev_sectors - 1; |
| @@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 3946 | wait_event(conf->wait_for_overlap, | 3946 | wait_event(conf->wait_for_overlap, |
| 3947 | atomic_read(&conf->reshape_stripes) == 0); | 3947 | atomic_read(&conf->reshape_stripes) == 0); |
| 3948 | mddev->reshape_position = conf->reshape_progress; | 3948 | mddev->reshape_position = conf->reshape_progress; |
| 3949 | mddev->curr_resync_completed = mddev->curr_resync; | 3949 | mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; |
| 3950 | conf->reshape_checkpoint = jiffies; | 3950 | conf->reshape_checkpoint = jiffies; |
| 3951 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3951 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
| 3952 | md_wakeup_thread(mddev->thread); | 3952 | md_wakeup_thread(mddev->thread); |
| @@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
| 4129 | static void raid5d(mddev_t *mddev) | 4129 | static void raid5d(mddev_t *mddev) |
| 4130 | { | 4130 | { |
| 4131 | struct stripe_head *sh; | 4131 | struct stripe_head *sh; |
| 4132 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4132 | raid5_conf_t *conf = mddev->private; |
| 4133 | int handled; | 4133 | int handled; |
| 4134 | 4134 | ||
| 4135 | pr_debug("+++ raid5d active\n"); | 4135 | pr_debug("+++ raid5d active\n"); |
| @@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev) | |||
| 4185 | static ssize_t | 4185 | static ssize_t |
| 4186 | raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | 4186 | raid5_show_stripe_cache_size(mddev_t *mddev, char *page) |
| 4187 | { | 4187 | { |
| 4188 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4188 | raid5_conf_t *conf = mddev->private; |
| 4189 | if (conf) | 4189 | if (conf) |
| 4190 | return sprintf(page, "%d\n", conf->max_nr_stripes); | 4190 | return sprintf(page, "%d\n", conf->max_nr_stripes); |
| 4191 | else | 4191 | else |
| @@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | |||
| 4195 | static ssize_t | 4195 | static ssize_t |
| 4196 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4196 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) |
| 4197 | { | 4197 | { |
| 4198 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4198 | raid5_conf_t *conf = mddev->private; |
| 4199 | unsigned long new; | 4199 | unsigned long new; |
| 4200 | int err; | 4200 | int err; |
| 4201 | 4201 | ||
| @@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, | |||
| 4233 | static ssize_t | 4233 | static ssize_t |
| 4234 | raid5_show_preread_threshold(mddev_t *mddev, char *page) | 4234 | raid5_show_preread_threshold(mddev_t *mddev, char *page) |
| 4235 | { | 4235 | { |
| 4236 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4236 | raid5_conf_t *conf = mddev->private; |
| 4237 | if (conf) | 4237 | if (conf) |
| 4238 | return sprintf(page, "%d\n", conf->bypass_threshold); | 4238 | return sprintf(page, "%d\n", conf->bypass_threshold); |
| 4239 | else | 4239 | else |
| @@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page) | |||
| 4243 | static ssize_t | 4243 | static ssize_t |
| 4244 | raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) | 4244 | raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) |
| 4245 | { | 4245 | { |
| 4246 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4246 | raid5_conf_t *conf = mddev->private; |
| 4247 | unsigned long new; | 4247 | unsigned long new; |
| 4248 | if (len >= PAGE_SIZE) | 4248 | if (len >= PAGE_SIZE) |
| 4249 | return -EINVAL; | 4249 | return -EINVAL; |
| @@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
| 4267 | static ssize_t | 4267 | static ssize_t |
| 4268 | stripe_cache_active_show(mddev_t *mddev, char *page) | 4268 | stripe_cache_active_show(mddev_t *mddev, char *page) |
| 4269 | { | 4269 | { |
| 4270 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4270 | raid5_conf_t *conf = mddev->private; |
| 4271 | if (conf) | 4271 | if (conf) |
| 4272 | return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); | 4272 | return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); |
| 4273 | else | 4273 | else |
| @@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = { | |||
| 4291 | static sector_t | 4291 | static sector_t |
| 4292 | raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | 4292 | raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) |
| 4293 | { | 4293 | { |
| 4294 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4294 | raid5_conf_t *conf = mddev->private; |
| 4295 | 4295 | ||
| 4296 | if (!sectors) | 4296 | if (!sectors) |
| 4297 | sectors = mddev->dev_sectors; | 4297 | sectors = mddev->dev_sectors; |
| @@ -4303,8 +4303,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 4303 | raid_disks = conf->previous_raid_disks; | 4303 | raid_disks = conf->previous_raid_disks; |
| 4304 | } | 4304 | } |
| 4305 | 4305 | ||
| 4306 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | 4306 | sectors &= ~((sector_t)mddev->chunk_sectors - 1); |
| 4307 | sectors &= ~((sector_t)mddev->new_chunk/512 - 1); | 4307 | sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); |
| 4308 | return sectors * (raid_disks - conf->max_degraded); | 4308 | return sectors * (raid_disks - conf->max_degraded); |
| 4309 | } | 4309 | } |
| 4310 | 4310 | ||
| @@ -4336,9 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4336 | return ERR_PTR(-EINVAL); | 4336 | return ERR_PTR(-EINVAL); |
| 4337 | } | 4337 | } |
| 4338 | 4338 | ||
| 4339 | if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { | 4339 | if (!mddev->new_chunk_sectors || |
| 4340 | (mddev->new_chunk_sectors << 9) % PAGE_SIZE || | ||
| 4341 | !is_power_of_2(mddev->new_chunk_sectors)) { | ||
| 4340 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", | 4342 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", |
| 4341 | mddev->new_chunk, mdname(mddev)); | 4343 | mddev->new_chunk_sectors << 9, mdname(mddev)); |
| 4342 | return ERR_PTR(-EINVAL); | 4344 | return ERR_PTR(-EINVAL); |
| 4343 | } | 4345 | } |
| 4344 | 4346 | ||
| @@ -4401,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4401 | conf->fullsync = 1; | 4403 | conf->fullsync = 1; |
| 4402 | } | 4404 | } |
| 4403 | 4405 | ||
| 4404 | conf->chunk_size = mddev->new_chunk; | 4406 | conf->chunk_sectors = mddev->new_chunk_sectors; |
| 4405 | conf->level = mddev->new_level; | 4407 | conf->level = mddev->new_level; |
| 4406 | if (conf->level == 6) | 4408 | if (conf->level == 6) |
| 4407 | conf->max_degraded = 2; | 4409 | conf->max_degraded = 2; |
| @@ -4411,7 +4413,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4411 | conf->max_nr_stripes = NR_STRIPES; | 4413 | conf->max_nr_stripes = NR_STRIPES; |
| 4412 | conf->reshape_progress = mddev->reshape_position; | 4414 | conf->reshape_progress = mddev->reshape_position; |
| 4413 | if (conf->reshape_progress != MaxSector) { | 4415 | if (conf->reshape_progress != MaxSector) { |
| 4414 | conf->prev_chunk = mddev->chunk_size; | 4416 | conf->prev_chunk_sectors = mddev->chunk_sectors; |
| 4415 | conf->prev_algo = mddev->layout; | 4417 | conf->prev_algo = mddev->layout; |
| 4416 | } | 4418 | } |
| 4417 | 4419 | ||
| @@ -4453,6 +4455,10 @@ static int run(mddev_t *mddev) | |||
| 4453 | int working_disks = 0; | 4455 | int working_disks = 0; |
| 4454 | mdk_rdev_t *rdev; | 4456 | mdk_rdev_t *rdev; |
| 4455 | 4457 | ||
| 4458 | if (mddev->recovery_cp != MaxSector) | ||
| 4459 | printk(KERN_NOTICE "raid5: %s is not clean" | ||
| 4460 | " -- starting background reconstruction\n", | ||
| 4461 | mdname(mddev)); | ||
| 4456 | if (mddev->reshape_position != MaxSector) { | 4462 | if (mddev->reshape_position != MaxSector) { |
| 4457 | /* Check that we can continue the reshape. | 4463 | /* Check that we can continue the reshape. |
| 4458 | * Currently only disks can change, it must | 4464 | * Currently only disks can change, it must |
| @@ -4475,7 +4481,7 @@ static int run(mddev_t *mddev) | |||
| 4475 | * geometry. | 4481 | * geometry. |
| 4476 | */ | 4482 | */ |
| 4477 | here_new = mddev->reshape_position; | 4483 | here_new = mddev->reshape_position; |
| 4478 | if (sector_div(here_new, (mddev->new_chunk>>9)* | 4484 | if (sector_div(here_new, mddev->new_chunk_sectors * |
| 4479 | (mddev->raid_disks - max_degraded))) { | 4485 | (mddev->raid_disks - max_degraded))) { |
| 4480 | printk(KERN_ERR "raid5: reshape_position not " | 4486 | printk(KERN_ERR "raid5: reshape_position not " |
| 4481 | "on a stripe boundary\n"); | 4487 | "on a stripe boundary\n"); |
| @@ -4483,7 +4489,7 @@ static int run(mddev_t *mddev) | |||
| 4483 | } | 4489 | } |
| 4484 | /* here_new is the stripe we will write to */ | 4490 | /* here_new is the stripe we will write to */ |
| 4485 | here_old = mddev->reshape_position; | 4491 | here_old = mddev->reshape_position; |
| 4486 | sector_div(here_old, (mddev->chunk_size>>9)* | 4492 | sector_div(here_old, mddev->chunk_sectors * |
| 4487 | (old_disks-max_degraded)); | 4493 | (old_disks-max_degraded)); |
| 4488 | /* here_old is the first stripe that we might need to read | 4494 | /* here_old is the first stripe that we might need to read |
| 4489 | * from */ | 4495 | * from */ |
| @@ -4498,7 +4504,7 @@ static int run(mddev_t *mddev) | |||
| 4498 | } else { | 4504 | } else { |
| 4499 | BUG_ON(mddev->level != mddev->new_level); | 4505 | BUG_ON(mddev->level != mddev->new_level); |
| 4500 | BUG_ON(mddev->layout != mddev->new_layout); | 4506 | BUG_ON(mddev->layout != mddev->new_layout); |
| 4501 | BUG_ON(mddev->chunk_size != mddev->new_chunk); | 4507 | BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); |
| 4502 | BUG_ON(mddev->delta_disks != 0); | 4508 | BUG_ON(mddev->delta_disks != 0); |
| 4503 | } | 4509 | } |
| 4504 | 4510 | ||
| @@ -4532,7 +4538,7 @@ static int run(mddev_t *mddev) | |||
| 4532 | } | 4538 | } |
| 4533 | 4539 | ||
| 4534 | /* device size must be a multiple of chunk size */ | 4540 | /* device size must be a multiple of chunk size */ |
| 4535 | mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); | 4541 | mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); |
| 4536 | mddev->resync_max_sectors = mddev->dev_sectors; | 4542 | mddev->resync_max_sectors = mddev->dev_sectors; |
| 4537 | 4543 | ||
| 4538 | if (mddev->degraded > 0 && | 4544 | if (mddev->degraded > 0 && |
| @@ -4581,7 +4587,7 @@ static int run(mddev_t *mddev) | |||
| 4581 | { | 4587 | { |
| 4582 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | 4588 | int data_disks = conf->previous_raid_disks - conf->max_degraded; |
| 4583 | int stripe = data_disks * | 4589 | int stripe = data_disks * |
| 4584 | (mddev->chunk_size / PAGE_SIZE); | 4590 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
| 4585 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 4591 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
| 4586 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 4592 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
| 4587 | } | 4593 | } |
| @@ -4678,7 +4684,8 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
| 4678 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 4684 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
| 4679 | int i; | 4685 | int i; |
| 4680 | 4686 | ||
| 4681 | seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); | 4687 | seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, |
| 4688 | mddev->chunk_sectors / 2, mddev->layout); | ||
| 4682 | seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); | 4689 | seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); |
| 4683 | for (i = 0; i < conf->raid_disks; i++) | 4690 | for (i = 0; i < conf->raid_disks; i++) |
| 4684 | seq_printf (seq, "%s", | 4691 | seq_printf (seq, "%s", |
| @@ -4826,7 +4833,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
| 4826 | * any io in the removed space completes, but it hardly seems | 4833 | * any io in the removed space completes, but it hardly seems |
| 4827 | * worth it. | 4834 | * worth it. |
| 4828 | */ | 4835 | */ |
| 4829 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | 4836 | sectors &= ~((sector_t)mddev->chunk_sectors - 1); |
| 4830 | md_set_array_sectors(mddev, raid5_size(mddev, sectors, | 4837 | md_set_array_sectors(mddev, raid5_size(mddev, sectors, |
| 4831 | mddev->raid_disks)); | 4838 | mddev->raid_disks)); |
| 4832 | if (mddev->array_sectors > | 4839 | if (mddev->array_sectors > |
| @@ -4843,14 +4850,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
| 4843 | return 0; | 4850 | return 0; |
| 4844 | } | 4851 | } |
| 4845 | 4852 | ||
| 4846 | static int raid5_check_reshape(mddev_t *mddev) | 4853 | static int check_stripe_cache(mddev_t *mddev) |
| 4847 | { | 4854 | { |
| 4848 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4855 | /* Can only proceed if there are plenty of stripe_heads. |
| 4856 | * We need a minimum of one full stripe,, and for sensible progress | ||
| 4857 | * it is best to have about 4 times that. | ||
| 4858 | * If we require 4 times, then the default 256 4K stripe_heads will | ||
| 4859 | * allow for chunk sizes up to 256K, which is probably OK. | ||
| 4860 | * If the chunk size is greater, user-space should request more | ||
| 4861 | * stripe_heads first. | ||
| 4862 | */ | ||
| 4863 | raid5_conf_t *conf = mddev->private; | ||
| 4864 | if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 | ||
| 4865 | > conf->max_nr_stripes || | ||
| 4866 | ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 | ||
| 4867 | > conf->max_nr_stripes) { | ||
| 4868 | printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", | ||
| 4869 | ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) | ||
| 4870 | / STRIPE_SIZE)*4); | ||
| 4871 | return 0; | ||
| 4872 | } | ||
| 4873 | return 1; | ||
| 4874 | } | ||
| 4875 | |||
| 4876 | static int check_reshape(mddev_t *mddev) | ||
| 4877 | { | ||
| 4878 | raid5_conf_t *conf = mddev->private; | ||
| 4849 | 4879 | ||
| 4850 | if (mddev->delta_disks == 0 && | 4880 | if (mddev->delta_disks == 0 && |
| 4851 | mddev->new_layout == mddev->layout && | 4881 | mddev->new_layout == mddev->layout && |
| 4852 | mddev->new_chunk == mddev->chunk_size) | 4882 | mddev->new_chunk_sectors == mddev->chunk_sectors) |
| 4853 | return -EINVAL; /* nothing to do */ | 4883 | return 0; /* nothing to do */ |
| 4854 | if (mddev->bitmap) | 4884 | if (mddev->bitmap) |
| 4855 | /* Cannot grow a bitmap yet */ | 4885 | /* Cannot grow a bitmap yet */ |
| 4856 | return -EBUSY; | 4886 | return -EBUSY; |
| @@ -4869,28 +4899,15 @@ static int raid5_check_reshape(mddev_t *mddev) | |||
| 4869 | return -EINVAL; | 4899 | return -EINVAL; |
| 4870 | } | 4900 | } |
| 4871 | 4901 | ||
| 4872 | /* Can only proceed if there are plenty of stripe_heads. | 4902 | if (!check_stripe_cache(mddev)) |
| 4873 | * We need a minimum of one full stripe,, and for sensible progress | ||
| 4874 | * it is best to have about 4 times that. | ||
| 4875 | * If we require 4 times, then the default 256 4K stripe_heads will | ||
| 4876 | * allow for chunk sizes up to 256K, which is probably OK. | ||
| 4877 | * If the chunk size is greater, user-space should request more | ||
| 4878 | * stripe_heads first. | ||
| 4879 | */ | ||
| 4880 | if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || | ||
| 4881 | (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { | ||
| 4882 | printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", | ||
| 4883 | (max(mddev->chunk_size, mddev->new_chunk) | ||
| 4884 | / STRIPE_SIZE)*4); | ||
| 4885 | return -ENOSPC; | 4903 | return -ENOSPC; |
| 4886 | } | ||
| 4887 | 4904 | ||
| 4888 | return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); | 4905 | return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); |
| 4889 | } | 4906 | } |
| 4890 | 4907 | ||
| 4891 | static int raid5_start_reshape(mddev_t *mddev) | 4908 | static int raid5_start_reshape(mddev_t *mddev) |
| 4892 | { | 4909 | { |
| 4893 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4910 | raid5_conf_t *conf = mddev->private; |
| 4894 | mdk_rdev_t *rdev; | 4911 | mdk_rdev_t *rdev; |
| 4895 | int spares = 0; | 4912 | int spares = 0; |
| 4896 | int added_devices = 0; | 4913 | int added_devices = 0; |
| @@ -4899,6 +4916,9 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 4899 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4916 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
| 4900 | return -EBUSY; | 4917 | return -EBUSY; |
| 4901 | 4918 | ||
| 4919 | if (!check_stripe_cache(mddev)) | ||
| 4920 | return -ENOSPC; | ||
| 4921 | |||
| 4902 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4922 | list_for_each_entry(rdev, &mddev->disks, same_set) |
| 4903 | if (rdev->raid_disk < 0 && | 4923 | if (rdev->raid_disk < 0 && |
| 4904 | !test_bit(Faulty, &rdev->flags)) | 4924 | !test_bit(Faulty, &rdev->flags)) |
| @@ -4925,8 +4945,8 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 4925 | spin_lock_irq(&conf->device_lock); | 4945 | spin_lock_irq(&conf->device_lock); |
| 4926 | conf->previous_raid_disks = conf->raid_disks; | 4946 | conf->previous_raid_disks = conf->raid_disks; |
| 4927 | conf->raid_disks += mddev->delta_disks; | 4947 | conf->raid_disks += mddev->delta_disks; |
| 4928 | conf->prev_chunk = conf->chunk_size; | 4948 | conf->prev_chunk_sectors = conf->chunk_sectors; |
| 4929 | conf->chunk_size = mddev->new_chunk; | 4949 | conf->chunk_sectors = mddev->new_chunk_sectors; |
| 4930 | conf->prev_algo = conf->algorithm; | 4950 | conf->prev_algo = conf->algorithm; |
| 4931 | conf->algorithm = mddev->new_layout; | 4951 | conf->algorithm = mddev->new_layout; |
| 4932 | if (mddev->delta_disks < 0) | 4952 | if (mddev->delta_disks < 0) |
| @@ -5008,7 +5028,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
| 5008 | */ | 5028 | */ |
| 5009 | { | 5029 | { |
| 5010 | int data_disks = conf->raid_disks - conf->max_degraded; | 5030 | int data_disks = conf->raid_disks - conf->max_degraded; |
| 5011 | int stripe = data_disks * (conf->chunk_size | 5031 | int stripe = data_disks * ((conf->chunk_sectors << 9) |
| 5012 | / PAGE_SIZE); | 5032 | / PAGE_SIZE); |
| 5013 | if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 5033 | if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
| 5014 | conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 5034 | conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
| @@ -5022,7 +5042,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
| 5022 | static void raid5_finish_reshape(mddev_t *mddev) | 5042 | static void raid5_finish_reshape(mddev_t *mddev) |
| 5023 | { | 5043 | { |
| 5024 | struct block_device *bdev; | 5044 | struct block_device *bdev; |
| 5025 | raid5_conf_t *conf = mddev_to_conf(mddev); | 5045 | raid5_conf_t *conf = mddev->private; |
| 5026 | 5046 | ||
| 5027 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5047 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
| 5028 | 5048 | ||
| @@ -5053,7 +5073,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
| 5053 | raid5_remove_disk(mddev, d); | 5073 | raid5_remove_disk(mddev, d); |
| 5054 | } | 5074 | } |
| 5055 | mddev->layout = conf->algorithm; | 5075 | mddev->layout = conf->algorithm; |
| 5056 | mddev->chunk_size = conf->chunk_size; | 5076 | mddev->chunk_sectors = conf->chunk_sectors; |
| 5057 | mddev->reshape_position = MaxSector; | 5077 | mddev->reshape_position = MaxSector; |
| 5058 | mddev->delta_disks = 0; | 5078 | mddev->delta_disks = 0; |
| 5059 | } | 5079 | } |
| @@ -5061,7 +5081,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
| 5061 | 5081 | ||
| 5062 | static void raid5_quiesce(mddev_t *mddev, int state) | 5082 | static void raid5_quiesce(mddev_t *mddev, int state) |
| 5063 | { | 5083 | { |
| 5064 | raid5_conf_t *conf = mddev_to_conf(mddev); | 5084 | raid5_conf_t *conf = mddev->private; |
| 5065 | 5085 | ||
| 5066 | switch(state) { | 5086 | switch(state) { |
| 5067 | case 2: /* resume for a suspend */ | 5087 | case 2: /* resume for a suspend */ |
| @@ -5111,7 +5131,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev) | |||
| 5111 | 5131 | ||
| 5112 | mddev->new_level = 5; | 5132 | mddev->new_level = 5; |
| 5113 | mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; | 5133 | mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; |
| 5114 | mddev->new_chunk = chunksect << 9; | 5134 | mddev->new_chunk_sectors = chunksect; |
| 5115 | 5135 | ||
| 5116 | return setup_conf(mddev); | 5136 | return setup_conf(mddev); |
| 5117 | } | 5137 | } |
| @@ -5150,24 +5170,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev) | |||
| 5150 | } | 5170 | } |
| 5151 | 5171 | ||
| 5152 | 5172 | ||
| 5153 | static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) | 5173 | static int raid5_check_reshape(mddev_t *mddev) |
| 5154 | { | 5174 | { |
| 5155 | /* For a 2-drive array, the layout and chunk size can be changed | 5175 | /* For a 2-drive array, the layout and chunk size can be changed |
| 5156 | * immediately as not restriping is needed. | 5176 | * immediately as not restriping is needed. |
| 5157 | * For larger arrays we record the new value - after validation | 5177 | * For larger arrays we record the new value - after validation |
| 5158 | * to be used by a reshape pass. | 5178 | * to be used by a reshape pass. |
| 5159 | */ | 5179 | */ |
| 5160 | raid5_conf_t *conf = mddev_to_conf(mddev); | 5180 | raid5_conf_t *conf = mddev->private; |
| 5181 | int new_chunk = mddev->new_chunk_sectors; | ||
| 5161 | 5182 | ||
| 5162 | if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) | 5183 | if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) |
| 5163 | return -EINVAL; | 5184 | return -EINVAL; |
| 5164 | if (new_chunk > 0) { | 5185 | if (new_chunk > 0) { |
| 5165 | if (new_chunk & (new_chunk-1)) | 5186 | if (!is_power_of_2(new_chunk)) |
| 5166 | /* not a power of 2 */ | ||
| 5167 | return -EINVAL; | 5187 | return -EINVAL; |
| 5168 | if (new_chunk < PAGE_SIZE) | 5188 | if (new_chunk < (PAGE_SIZE>>9)) |
| 5169 | return -EINVAL; | 5189 | return -EINVAL; |
| 5170 | if (mddev->array_sectors & ((new_chunk>>9)-1)) | 5190 | if (mddev->array_sectors & (new_chunk-1)) |
| 5171 | /* not factor of array size */ | 5191 | /* not factor of array size */ |
| 5172 | return -EINVAL; | 5192 | return -EINVAL; |
| 5173 | } | 5193 | } |
| @@ -5175,49 +5195,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) | |||
| 5175 | /* They look valid */ | 5195 | /* They look valid */ |
| 5176 | 5196 | ||
| 5177 | if (mddev->raid_disks == 2) { | 5197 | if (mddev->raid_disks == 2) { |
| 5178 | 5198 | /* can make the change immediately */ | |
| 5179 | if (new_layout >= 0) { | 5199 | if (mddev->new_layout >= 0) { |
| 5180 | conf->algorithm = new_layout; | 5200 | conf->algorithm = mddev->new_layout; |
| 5181 | mddev->layout = mddev->new_layout = new_layout; | 5201 | mddev->layout = mddev->new_layout; |
| 5182 | } | 5202 | } |
| 5183 | if (new_chunk > 0) { | 5203 | if (new_chunk > 0) { |
| 5184 | conf->chunk_size = new_chunk; | 5204 | conf->chunk_sectors = new_chunk ; |
| 5185 | mddev->chunk_size = mddev->new_chunk = new_chunk; | 5205 | mddev->chunk_sectors = new_chunk; |
| 5186 | } | 5206 | } |
| 5187 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5207 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
| 5188 | md_wakeup_thread(mddev->thread); | 5208 | md_wakeup_thread(mddev->thread); |
| 5189 | } else { | ||
| 5190 | if (new_layout >= 0) | ||
| 5191 | mddev->new_layout = new_layout; | ||
| 5192 | if (new_chunk > 0) | ||
| 5193 | mddev->new_chunk = new_chunk; | ||
| 5194 | } | 5209 | } |
| 5195 | return 0; | 5210 | return check_reshape(mddev); |
| 5196 | } | 5211 | } |
| 5197 | 5212 | ||
| 5198 | static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) | 5213 | static int raid6_check_reshape(mddev_t *mddev) |
| 5199 | { | 5214 | { |
| 5200 | if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) | 5215 | int new_chunk = mddev->new_chunk_sectors; |
| 5216 | |||
| 5217 | if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) | ||
| 5201 | return -EINVAL; | 5218 | return -EINVAL; |
| 5202 | if (new_chunk > 0) { | 5219 | if (new_chunk > 0) { |
| 5203 | if (new_chunk & (new_chunk-1)) | 5220 | if (!is_power_of_2(new_chunk)) |
| 5204 | /* not a power of 2 */ | ||
| 5205 | return -EINVAL; | 5221 | return -EINVAL; |
| 5206 | if (new_chunk < PAGE_SIZE) | 5222 | if (new_chunk < (PAGE_SIZE >> 9)) |
| 5207 | return -EINVAL; | 5223 | return -EINVAL; |
| 5208 | if (mddev->array_sectors & ((new_chunk>>9)-1)) | 5224 | if (mddev->array_sectors & (new_chunk-1)) |
| 5209 | /* not factor of array size */ | 5225 | /* not factor of array size */ |
| 5210 | return -EINVAL; | 5226 | return -EINVAL; |
| 5211 | } | 5227 | } |
| 5212 | 5228 | ||
| 5213 | /* They look valid */ | 5229 | /* They look valid */ |
| 5214 | 5230 | return check_reshape(mddev); | |
| 5215 | if (new_layout >= 0) | ||
| 5216 | mddev->new_layout = new_layout; | ||
| 5217 | if (new_chunk > 0) | ||
| 5218 | mddev->new_chunk = new_chunk; | ||
| 5219 | |||
| 5220 | return 0; | ||
| 5221 | } | 5231 | } |
| 5222 | 5232 | ||
| 5223 | static void *raid5_takeover(mddev_t *mddev) | 5233 | static void *raid5_takeover(mddev_t *mddev) |
| @@ -5227,8 +5237,6 @@ static void *raid5_takeover(mddev_t *mddev) | |||
| 5227 | * raid1 - if there are two drives. We need to know the chunk size | 5237 | * raid1 - if there are two drives. We need to know the chunk size |
| 5228 | * raid4 - trivial - just use a raid4 layout. | 5238 | * raid4 - trivial - just use a raid4 layout. |
| 5229 | * raid6 - Providing it is a *_6 layout | 5239 | * raid6 - Providing it is a *_6 layout |
| 5230 | * | ||
| 5231 | * For now, just do raid1 | ||
| 5232 | */ | 5240 | */ |
| 5233 | 5241 | ||
| 5234 | if (mddev->level == 1) | 5242 | if (mddev->level == 1) |
| @@ -5310,12 +5318,11 @@ static struct mdk_personality raid6_personality = | |||
| 5310 | .sync_request = sync_request, | 5318 | .sync_request = sync_request, |
| 5311 | .resize = raid5_resize, | 5319 | .resize = raid5_resize, |
| 5312 | .size = raid5_size, | 5320 | .size = raid5_size, |
| 5313 | .check_reshape = raid5_check_reshape, | 5321 | .check_reshape = raid6_check_reshape, |
| 5314 | .start_reshape = raid5_start_reshape, | 5322 | .start_reshape = raid5_start_reshape, |
| 5315 | .finish_reshape = raid5_finish_reshape, | 5323 | .finish_reshape = raid5_finish_reshape, |
| 5316 | .quiesce = raid5_quiesce, | 5324 | .quiesce = raid5_quiesce, |
| 5317 | .takeover = raid6_takeover, | 5325 | .takeover = raid6_takeover, |
| 5318 | .reconfig = raid6_reconfig, | ||
| 5319 | }; | 5326 | }; |
| 5320 | static struct mdk_personality raid5_personality = | 5327 | static struct mdk_personality raid5_personality = |
| 5321 | { | 5328 | { |
| @@ -5338,7 +5345,6 @@ static struct mdk_personality raid5_personality = | |||
| 5338 | .finish_reshape = raid5_finish_reshape, | 5345 | .finish_reshape = raid5_finish_reshape, |
| 5339 | .quiesce = raid5_quiesce, | 5346 | .quiesce = raid5_quiesce, |
| 5340 | .takeover = raid5_takeover, | 5347 | .takeover = raid5_takeover, |
| 5341 | .reconfig = raid5_reconfig, | ||
| 5342 | }; | 5348 | }; |
| 5343 | 5349 | ||
| 5344 | static struct mdk_personality raid4_personality = | 5350 | static struct mdk_personality raid4_personality = |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 52ba99954dec..9459689c4ea0 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -334,7 +334,8 @@ struct raid5_private_data { | |||
| 334 | struct hlist_head *stripe_hashtbl; | 334 | struct hlist_head *stripe_hashtbl; |
| 335 | mddev_t *mddev; | 335 | mddev_t *mddev; |
| 336 | struct disk_info *spare; | 336 | struct disk_info *spare; |
| 337 | int chunk_size, level, algorithm; | 337 | int chunk_sectors; |
| 338 | int level, algorithm; | ||
| 338 | int max_degraded; | 339 | int max_degraded; |
| 339 | int raid_disks; | 340 | int raid_disks; |
| 340 | int max_nr_stripes; | 341 | int max_nr_stripes; |
| @@ -350,7 +351,8 @@ struct raid5_private_data { | |||
| 350 | */ | 351 | */ |
| 351 | sector_t reshape_safe; | 352 | sector_t reshape_safe; |
| 352 | int previous_raid_disks; | 353 | int previous_raid_disks; |
| 353 | int prev_chunk, prev_algo; | 354 | int prev_chunk_sectors; |
| 355 | int prev_algo; | ||
| 354 | short generation; /* increments with every reshape */ | 356 | short generation; /* increments with every reshape */ |
| 355 | unsigned long reshape_checkpoint; /* Time we last updated | 357 | unsigned long reshape_checkpoint; /* Time we last updated |
| 356 | * metadata */ | 358 | * metadata */ |
| @@ -408,8 +410,6 @@ struct raid5_private_data { | |||
| 408 | 410 | ||
| 409 | typedef struct raid5_private_data raid5_conf_t; | 411 | typedef struct raid5_private_data raid5_conf_t; |
| 410 | 412 | ||
| 411 | #define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) | ||
| 412 | |||
| 413 | /* | 413 | /* |
| 414 | * Our supported algorithms | 414 | * Our supported algorithms |
| 415 | */ | 415 | */ |
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 6ba830fa8538..ffa2efbbe382 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h | |||
| @@ -232,7 +232,7 @@ struct mdp_superblock_1 { | |||
| 232 | __le64 reshape_position; /* next address in array-space for reshape */ | 232 | __le64 reshape_position; /* next address in array-space for reshape */ |
| 233 | __le32 delta_disks; /* change in number of raid_disks */ | 233 | __le32 delta_disks; /* change in number of raid_disks */ |
| 234 | __le32 new_layout; /* new layout */ | 234 | __le32 new_layout; /* new layout */ |
| 235 | __le32 new_chunk; /* new chunk size (bytes) */ | 235 | __le32 new_chunk; /* new chunk size (512byte sectors) */ |
| 236 | __u8 pad1[128-124]; /* set to 0 when written */ | 236 | __u8 pad1[128-124]; /* set to 0 when written */ |
| 237 | 237 | ||
| 238 | /* constant this-device information - 64 bytes */ | 238 | /* constant this-device information - 64 bytes */ |
