diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-03 12:08:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-03 12:08:19 -0400 |
commit | 223cdea4c4b5af5181b2da00ac85711d1e0c737c (patch) | |
tree | dfe7226c70ddabbf2e2e63924ba636345278e79c /drivers/md/md.c | |
parent | 31e6e2dac575c9d21a6ec56ca52ae89086baa705 (diff) | |
parent | c8f517c444e4f9f55b5b5ca202b8404691a35805 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (53 commits)
md/raid5 revise rules for when to update metadata during reshape
md/raid5: minor code cleanups in make_request.
md: remove CONFIG_MD_RAID_RESHAPE config option.
md/raid5: be more careful about write ordering when reshaping.
md: don't display meaningless values in sysfs files resync_start and sync_speed
md/raid5: allow layout and chunksize to be changed on active array.
md/raid5: reshape using largest of old and new chunk size
md/raid5: prepare for allowing reshape to change layout
md/raid5: prepare for allowing reshape to change chunksize.
md/raid5: clearly differentiate 'before' and 'after' stripes during reshape.
Documentation/md.txt update
md: allow number of drives in raid5 to be reduced
md/raid5: change reshape-progress measurement to cope with reshaping backwards.
md: add explicit method to signal the end of a reshape.
md/raid5: enhance raid5_size to work correctly with negative delta_disks
md/raid5: drop qd_idx from r6_state
md/raid6: move raid6 data processing to raid6_pq.ko
md: raid5 run(): Fix max_degraded for raid level 4.
md: 'array_size' sysfs attribute
md: centralize ->array_sectors modifications
...
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 615 |
1 files changed, 467 insertions, 148 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a307f87eb90e..ed5727c089a9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -33,9 +33,9 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/kthread.h> | 35 | #include <linux/kthread.h> |
36 | #include <linux/raid/md.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/raid/bitmap.h> | ||
38 | #include <linux/sysctl.h> | 37 | #include <linux/sysctl.h> |
38 | #include <linux/seq_file.h> | ||
39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 39 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
40 | #include <linux/poll.h> | 40 | #include <linux/poll.h> |
41 | #include <linux/ctype.h> | 41 | #include <linux/ctype.h> |
@@ -45,11 +45,10 @@ | |||
45 | #include <linux/reboot.h> | 45 | #include <linux/reboot.h> |
46 | #include <linux/file.h> | 46 | #include <linux/file.h> |
47 | #include <linux/delay.h> | 47 | #include <linux/delay.h> |
48 | 48 | #include <linux/raid/md_p.h> | |
49 | #define MAJOR_NR MD_MAJOR | 49 | #include <linux/raid/md_u.h> |
50 | 50 | #include "md.h" | |
51 | /* 63 partitions with the alternate major number (mdp) */ | 51 | #include "bitmap.h" |
52 | #define MdpMinorShift 6 | ||
53 | 52 | ||
54 | #define DEBUG 0 | 53 | #define DEBUG 0 |
55 | #define dprintk(x...) ((void)(DEBUG && printk(x))) | 54 | #define dprintk(x...) ((void)(DEBUG && printk(x))) |
@@ -202,12 +201,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
202 | ) | 201 | ) |
203 | 202 | ||
204 | 203 | ||
205 | static int md_fail_request(struct request_queue *q, struct bio *bio) | 204 | /* Rather than calling directly into the personality make_request function, |
205 | * IO requests come here first so that we can check if the device is | ||
206 | * being suspended pending a reconfiguration. | ||
207 | * We hold a refcount over the call to ->make_request. By the time that | ||
208 | * call has finished, the bio has been linked into some internal structure | ||
209 | * and so is visible to ->quiesce(), so we don't need the refcount any more. | ||
210 | */ | ||
211 | static int md_make_request(struct request_queue *q, struct bio *bio) | ||
206 | { | 212 | { |
207 | bio_io_error(bio); | 213 | mddev_t *mddev = q->queuedata; |
208 | return 0; | 214 | int rv; |
215 | if (mddev == NULL || mddev->pers == NULL) { | ||
216 | bio_io_error(bio); | ||
217 | return 0; | ||
218 | } | ||
219 | rcu_read_lock(); | ||
220 | if (mddev->suspended) { | ||
221 | DEFINE_WAIT(__wait); | ||
222 | for (;;) { | ||
223 | prepare_to_wait(&mddev->sb_wait, &__wait, | ||
224 | TASK_UNINTERRUPTIBLE); | ||
225 | if (!mddev->suspended) | ||
226 | break; | ||
227 | rcu_read_unlock(); | ||
228 | schedule(); | ||
229 | rcu_read_lock(); | ||
230 | } | ||
231 | finish_wait(&mddev->sb_wait, &__wait); | ||
232 | } | ||
233 | atomic_inc(&mddev->active_io); | ||
234 | rcu_read_unlock(); | ||
235 | rv = mddev->pers->make_request(q, bio); | ||
236 | if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) | ||
237 | wake_up(&mddev->sb_wait); | ||
238 | |||
239 | return rv; | ||
240 | } | ||
241 | |||
242 | static void mddev_suspend(mddev_t *mddev) | ||
243 | { | ||
244 | BUG_ON(mddev->suspended); | ||
245 | mddev->suspended = 1; | ||
246 | synchronize_rcu(); | ||
247 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | ||
248 | mddev->pers->quiesce(mddev, 1); | ||
249 | md_unregister_thread(mddev->thread); | ||
250 | mddev->thread = NULL; | ||
251 | /* we now know that no code is executing in the personality module, | ||
252 | * except possibly the tail end of a ->bi_end_io function, but that | ||
253 | * is certain to complete before the module has a chance to get | ||
254 | * unloaded | ||
255 | */ | ||
256 | } | ||
257 | |||
258 | static void mddev_resume(mddev_t *mddev) | ||
259 | { | ||
260 | mddev->suspended = 0; | ||
261 | wake_up(&mddev->sb_wait); | ||
262 | mddev->pers->quiesce(mddev, 0); | ||
209 | } | 263 | } |
210 | 264 | ||
265 | |||
211 | static inline mddev_t *mddev_get(mddev_t *mddev) | 266 | static inline mddev_t *mddev_get(mddev_t *mddev) |
212 | { | 267 | { |
213 | atomic_inc(&mddev->active); | 268 | atomic_inc(&mddev->active); |
@@ -310,6 +365,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
310 | init_timer(&new->safemode_timer); | 365 | init_timer(&new->safemode_timer); |
311 | atomic_set(&new->active, 1); | 366 | atomic_set(&new->active, 1); |
312 | atomic_set(&new->openers, 0); | 367 | atomic_set(&new->openers, 0); |
368 | atomic_set(&new->active_io, 0); | ||
313 | spin_lock_init(&new->write_lock); | 369 | spin_lock_init(&new->write_lock); |
314 | init_waitqueue_head(&new->sb_wait); | 370 | init_waitqueue_head(&new->sb_wait); |
315 | init_waitqueue_head(&new->recovery_wait); | 371 | init_waitqueue_head(&new->recovery_wait); |
@@ -326,6 +382,11 @@ static inline int mddev_lock(mddev_t * mddev) | |||
326 | return mutex_lock_interruptible(&mddev->reconfig_mutex); | 382 | return mutex_lock_interruptible(&mddev->reconfig_mutex); |
327 | } | 383 | } |
328 | 384 | ||
385 | static inline int mddev_is_locked(mddev_t *mddev) | ||
386 | { | ||
387 | return mutex_is_locked(&mddev->reconfig_mutex); | ||
388 | } | ||
389 | |||
329 | static inline int mddev_trylock(mddev_t * mddev) | 390 | static inline int mddev_trylock(mddev_t * mddev) |
330 | { | 391 | { |
331 | return mutex_trylock(&mddev->reconfig_mutex); | 392 | return mutex_trylock(&mddev->reconfig_mutex); |
@@ -409,7 +470,7 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
409 | rdev->sb_loaded = 0; | 470 | rdev->sb_loaded = 0; |
410 | rdev->sb_page = NULL; | 471 | rdev->sb_page = NULL; |
411 | rdev->sb_start = 0; | 472 | rdev->sb_start = 0; |
412 | rdev->size = 0; | 473 | rdev->sectors = 0; |
413 | } | 474 | } |
414 | } | 475 | } |
415 | 476 | ||
@@ -775,9 +836,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
775 | else | 836 | else |
776 | ret = 0; | 837 | ret = 0; |
777 | } | 838 | } |
778 | rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2; | 839 | rdev->sectors = calc_num_sectors(rdev, sb->chunk_size); |
779 | 840 | ||
780 | if (rdev->size < sb->size && sb->level > 1) | 841 | if (rdev->sectors < sb->size * 2 && sb->level > 1) |
781 | /* "this cannot possibly happen" ... */ | 842 | /* "this cannot possibly happen" ... */ |
782 | ret = -EINVAL; | 843 | ret = -EINVAL; |
783 | 844 | ||
@@ -812,7 +873,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
812 | mddev->clevel[0] = 0; | 873 | mddev->clevel[0] = 0; |
813 | mddev->layout = sb->layout; | 874 | mddev->layout = sb->layout; |
814 | mddev->raid_disks = sb->raid_disks; | 875 | mddev->raid_disks = sb->raid_disks; |
815 | mddev->size = sb->size; | 876 | mddev->dev_sectors = sb->size * 2; |
816 | mddev->events = ev1; | 877 | mddev->events = ev1; |
817 | mddev->bitmap_offset = 0; | 878 | mddev->bitmap_offset = 0; |
818 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 879 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
@@ -926,7 +987,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
926 | 987 | ||
927 | sb->ctime = mddev->ctime; | 988 | sb->ctime = mddev->ctime; |
928 | sb->level = mddev->level; | 989 | sb->level = mddev->level; |
929 | sb->size = mddev->size; | 990 | sb->size = mddev->dev_sectors / 2; |
930 | sb->raid_disks = mddev->raid_disks; | 991 | sb->raid_disks = mddev->raid_disks; |
931 | sb->md_minor = mddev->md_minor; | 992 | sb->md_minor = mddev->md_minor; |
932 | sb->not_persistent = 0; | 993 | sb->not_persistent = 0; |
@@ -1024,7 +1085,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1024 | static unsigned long long | 1085 | static unsigned long long |
1025 | super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | 1086 | super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) |
1026 | { | 1087 | { |
1027 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | 1088 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1028 | return 0; /* component must fit device */ | 1089 | return 0; /* component must fit device */ |
1029 | if (rdev->mddev->bitmap_offset) | 1090 | if (rdev->mddev->bitmap_offset) |
1030 | return 0; /* can't move bitmap */ | 1091 | return 0; /* can't move bitmap */ |
@@ -1180,16 +1241,17 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1180 | ret = 0; | 1241 | ret = 0; |
1181 | } | 1242 | } |
1182 | if (minor_version) | 1243 | if (minor_version) |
1183 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; | 1244 | rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) - |
1245 | le64_to_cpu(sb->data_offset); | ||
1184 | else | 1246 | else |
1185 | rdev->size = rdev->sb_start / 2; | 1247 | rdev->sectors = rdev->sb_start; |
1186 | if (rdev->size < le64_to_cpu(sb->data_size)/2) | 1248 | if (rdev->sectors < le64_to_cpu(sb->data_size)) |
1187 | return -EINVAL; | 1249 | return -EINVAL; |
1188 | rdev->size = le64_to_cpu(sb->data_size)/2; | 1250 | rdev->sectors = le64_to_cpu(sb->data_size); |
1189 | if (le32_to_cpu(sb->chunksize)) | 1251 | if (le32_to_cpu(sb->chunksize)) |
1190 | rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1); | 1252 | rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1); |
1191 | 1253 | ||
1192 | if (le64_to_cpu(sb->size) > rdev->size*2) | 1254 | if (le64_to_cpu(sb->size) > rdev->sectors) |
1193 | return -EINVAL; | 1255 | return -EINVAL; |
1194 | return ret; | 1256 | return ret; |
1195 | } | 1257 | } |
@@ -1216,7 +1278,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1216 | mddev->clevel[0] = 0; | 1278 | mddev->clevel[0] = 0; |
1217 | mddev->layout = le32_to_cpu(sb->layout); | 1279 | mddev->layout = le32_to_cpu(sb->layout); |
1218 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); | 1280 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); |
1219 | mddev->size = le64_to_cpu(sb->size)/2; | 1281 | mddev->dev_sectors = le64_to_cpu(sb->size); |
1220 | mddev->events = ev1; | 1282 | mddev->events = ev1; |
1221 | mddev->bitmap_offset = 0; | 1283 | mddev->bitmap_offset = 0; |
1222 | mddev->default_bitmap_offset = 1024 >> 9; | 1284 | mddev->default_bitmap_offset = 1024 >> 9; |
@@ -1312,7 +1374,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1312 | sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); | 1374 | sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); |
1313 | 1375 | ||
1314 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); | 1376 | sb->raid_disks = cpu_to_le32(mddev->raid_disks); |
1315 | sb->size = cpu_to_le64(mddev->size<<1); | 1377 | sb->size = cpu_to_le64(mddev->dev_sectors); |
1316 | 1378 | ||
1317 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | 1379 | if (mddev->bitmap && mddev->bitmap_file == NULL) { |
1318 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1380 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
@@ -1320,10 +1382,15 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1320 | } | 1382 | } |
1321 | 1383 | ||
1322 | if (rdev->raid_disk >= 0 && | 1384 | if (rdev->raid_disk >= 0 && |
1323 | !test_bit(In_sync, &rdev->flags) && | 1385 | !test_bit(In_sync, &rdev->flags)) { |
1324 | rdev->recovery_offset > 0) { | 1386 | if (mddev->curr_resync_completed > rdev->recovery_offset) |
1325 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | 1387 | rdev->recovery_offset = mddev->curr_resync_completed; |
1326 | sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); | 1388 | if (rdev->recovery_offset > 0) { |
1389 | sb->feature_map |= | ||
1390 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | ||
1391 | sb->recovery_offset = | ||
1392 | cpu_to_le64(rdev->recovery_offset); | ||
1393 | } | ||
1327 | } | 1394 | } |
1328 | 1395 | ||
1329 | if (mddev->reshape_position != MaxSector) { | 1396 | if (mddev->reshape_position != MaxSector) { |
@@ -1365,7 +1432,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1365 | { | 1432 | { |
1366 | struct mdp_superblock_1 *sb; | 1433 | struct mdp_superblock_1 *sb; |
1367 | sector_t max_sectors; | 1434 | sector_t max_sectors; |
1368 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | 1435 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1369 | return 0; /* component must fit device */ | 1436 | return 0; /* component must fit device */ |
1370 | if (rdev->sb_start < rdev->data_offset) { | 1437 | if (rdev->sb_start < rdev->data_offset) { |
1371 | /* minor versions 1 and 2; superblock before data */ | 1438 | /* minor versions 1 and 2; superblock before data */ |
@@ -1381,7 +1448,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | |||
1381 | sector_t sb_start; | 1448 | sector_t sb_start; |
1382 | sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; | 1449 | sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; |
1383 | sb_start &= ~(sector_t)(4*2 - 1); | 1450 | sb_start &= ~(sector_t)(4*2 - 1); |
1384 | max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; | 1451 | max_sectors = rdev->sectors + sb_start - rdev->sb_start; |
1385 | if (!num_sectors || num_sectors > max_sectors) | 1452 | if (!num_sectors || num_sectors > max_sectors) |
1386 | num_sectors = max_sectors; | 1453 | num_sectors = max_sectors; |
1387 | rdev->sb_start = sb_start; | 1454 | rdev->sb_start = sb_start; |
@@ -1433,6 +1500,38 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1433 | 1500 | ||
1434 | static LIST_HEAD(pending_raid_disks); | 1501 | static LIST_HEAD(pending_raid_disks); |
1435 | 1502 | ||
1503 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1504 | { | ||
1505 | struct mdk_personality *pers = mddev->pers; | ||
1506 | struct gendisk *disk = mddev->gendisk; | ||
1507 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | ||
1508 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | ||
1509 | |||
1510 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | ||
1511 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1512 | return; | ||
1513 | |||
1514 | /* If rdev is integrity capable, register profile for mddev */ | ||
1515 | if (!bi_mddev && bi_rdev) { | ||
1516 | if (blk_integrity_register(disk, bi_rdev)) | ||
1517 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1518 | __func__, disk->disk_name); | ||
1519 | else | ||
1520 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1521 | disk->disk_name); | ||
1522 | return; | ||
1523 | } | ||
1524 | |||
1525 | /* Check that mddev and rdev have matching profiles */ | ||
1526 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | ||
1527 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | ||
1528 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1529 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1530 | disk->disk_name); | ||
1531 | blk_integrity_unregister(disk); | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1436 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1535 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1437 | { | 1536 | { |
1438 | char b[BDEVNAME_SIZE]; | 1537 | char b[BDEVNAME_SIZE]; |
@@ -1449,8 +1548,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1449 | if (find_rdev(mddev, rdev->bdev->bd_dev)) | 1548 | if (find_rdev(mddev, rdev->bdev->bd_dev)) |
1450 | return -EEXIST; | 1549 | return -EEXIST; |
1451 | 1550 | ||
1452 | /* make sure rdev->size exceeds mddev->size */ | 1551 | /* make sure rdev->sectors exceeds mddev->dev_sectors */ |
1453 | if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { | 1552 | if (rdev->sectors && (mddev->dev_sectors == 0 || |
1553 | rdev->sectors < mddev->dev_sectors)) { | ||
1454 | if (mddev->pers) { | 1554 | if (mddev->pers) { |
1455 | /* Cannot change size, so fail | 1555 | /* Cannot change size, so fail |
1456 | * If mddev->level <= 0, then we don't care | 1556 | * If mddev->level <= 0, then we don't care |
@@ -1459,7 +1559,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1459 | if (mddev->level > 0) | 1559 | if (mddev->level > 0) |
1460 | return -ENOSPC; | 1560 | return -ENOSPC; |
1461 | } else | 1561 | } else |
1462 | mddev->size = rdev->size; | 1562 | mddev->dev_sectors = rdev->sectors; |
1463 | } | 1563 | } |
1464 | 1564 | ||
1465 | /* Verify rdev->desc_nr is unique. | 1565 | /* Verify rdev->desc_nr is unique. |
@@ -1503,6 +1603,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1503 | 1603 | ||
1504 | /* May as well allow recovery to be retried once */ | 1604 | /* May as well allow recovery to be retried once */ |
1505 | mddev->recovery_disabled = 0; | 1605 | mddev->recovery_disabled = 0; |
1606 | |||
1607 | md_integrity_check(rdev, mddev); | ||
1506 | return 0; | 1608 | return 0; |
1507 | 1609 | ||
1508 | fail: | 1610 | fail: |
@@ -1713,8 +1815,8 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1713 | static void print_rdev(mdk_rdev_t *rdev, int major_version) | 1815 | static void print_rdev(mdk_rdev_t *rdev, int major_version) |
1714 | { | 1816 | { |
1715 | char b[BDEVNAME_SIZE]; | 1817 | char b[BDEVNAME_SIZE]; |
1716 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", | 1818 | printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n", |
1717 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, | 1819 | bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors, |
1718 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), | 1820 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), |
1719 | rdev->desc_nr); | 1821 | rdev->desc_nr); |
1720 | if (rdev->sb_loaded) { | 1822 | if (rdev->sb_loaded) { |
@@ -2153,7 +2255,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2153 | return -EINVAL; | 2255 | return -EINVAL; |
2154 | if (rdev->mddev->pers && rdev->raid_disk >= 0) | 2256 | if (rdev->mddev->pers && rdev->raid_disk >= 0) |
2155 | return -EBUSY; | 2257 | return -EBUSY; |
2156 | if (rdev->size && rdev->mddev->external) | 2258 | if (rdev->sectors && rdev->mddev->external) |
2157 | /* Must set offset before size, so overlap checks | 2259 | /* Must set offset before size, so overlap checks |
2158 | * can be sane */ | 2260 | * can be sane */ |
2159 | return -EBUSY; | 2261 | return -EBUSY; |
@@ -2167,7 +2269,7 @@ __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); | |||
2167 | static ssize_t | 2269 | static ssize_t |
2168 | rdev_size_show(mdk_rdev_t *rdev, char *page) | 2270 | rdev_size_show(mdk_rdev_t *rdev, char *page) |
2169 | { | 2271 | { |
2170 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); | 2272 | return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2); |
2171 | } | 2273 | } |
2172 | 2274 | ||
2173 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | 2275 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) |
@@ -2180,34 +2282,52 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | |||
2180 | return 1; | 2282 | return 1; |
2181 | } | 2283 | } |
2182 | 2284 | ||
2285 | static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) | ||
2286 | { | ||
2287 | unsigned long long blocks; | ||
2288 | sector_t new; | ||
2289 | |||
2290 | if (strict_strtoull(buf, 10, &blocks) < 0) | ||
2291 | return -EINVAL; | ||
2292 | |||
2293 | if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) | ||
2294 | return -EINVAL; /* sector conversion overflow */ | ||
2295 | |||
2296 | new = blocks * 2; | ||
2297 | if (new != blocks * 2) | ||
2298 | return -EINVAL; /* unsigned long long to sector_t overflow */ | ||
2299 | |||
2300 | *sectors = new; | ||
2301 | return 0; | ||
2302 | } | ||
2303 | |||
2183 | static ssize_t | 2304 | static ssize_t |
2184 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 2305 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
2185 | { | 2306 | { |
2186 | unsigned long long size; | ||
2187 | unsigned long long oldsize = rdev->size; | ||
2188 | mddev_t *my_mddev = rdev->mddev; | 2307 | mddev_t *my_mddev = rdev->mddev; |
2308 | sector_t oldsectors = rdev->sectors; | ||
2309 | sector_t sectors; | ||
2189 | 2310 | ||
2190 | if (strict_strtoull(buf, 10, &size) < 0) | 2311 | if (strict_blocks_to_sectors(buf, §ors) < 0) |
2191 | return -EINVAL; | 2312 | return -EINVAL; |
2192 | if (my_mddev->pers && rdev->raid_disk >= 0) { | 2313 | if (my_mddev->pers && rdev->raid_disk >= 0) { |
2193 | if (my_mddev->persistent) { | 2314 | if (my_mddev->persistent) { |
2194 | size = super_types[my_mddev->major_version]. | 2315 | sectors = super_types[my_mddev->major_version]. |
2195 | rdev_size_change(rdev, size * 2); | 2316 | rdev_size_change(rdev, sectors); |
2196 | if (!size) | 2317 | if (!sectors) |
2197 | return -EBUSY; | 2318 | return -EBUSY; |
2198 | } else if (!size) { | 2319 | } else if (!sectors) |
2199 | size = (rdev->bdev->bd_inode->i_size >> 10); | 2320 | sectors = (rdev->bdev->bd_inode->i_size >> 9) - |
2200 | size -= rdev->data_offset/2; | 2321 | rdev->data_offset; |
2201 | } | ||
2202 | } | 2322 | } |
2203 | if (size < my_mddev->size) | 2323 | if (sectors < my_mddev->dev_sectors) |
2204 | return -EINVAL; /* component must fit device */ | 2324 | return -EINVAL; /* component must fit device */ |
2205 | 2325 | ||
2206 | rdev->size = size; | 2326 | rdev->sectors = sectors; |
2207 | if (size > oldsize && my_mddev->external) { | 2327 | if (sectors > oldsectors && my_mddev->external) { |
2208 | /* need to check that all other rdevs with the same ->bdev | 2328 | /* need to check that all other rdevs with the same ->bdev |
2209 | * do not overlap. We need to unlock the mddev to avoid | 2329 | * do not overlap. We need to unlock the mddev to avoid |
2210 | * a deadlock. We have already changed rdev->size, and if | 2330 | * a deadlock. We have already changed rdev->sectors, and if |
2211 | * we have to change it back, we will have the lock again. | 2331 | * we have to change it back, we will have the lock again. |
2212 | */ | 2332 | */ |
2213 | mddev_t *mddev; | 2333 | mddev_t *mddev; |
@@ -2223,9 +2343,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2223 | if (test_bit(AllReserved, &rdev2->flags) || | 2343 | if (test_bit(AllReserved, &rdev2->flags) || |
2224 | (rdev->bdev == rdev2->bdev && | 2344 | (rdev->bdev == rdev2->bdev && |
2225 | rdev != rdev2 && | 2345 | rdev != rdev2 && |
2226 | overlaps(rdev->data_offset, rdev->size * 2, | 2346 | overlaps(rdev->data_offset, rdev->sectors, |
2227 | rdev2->data_offset, | 2347 | rdev2->data_offset, |
2228 | rdev2->size * 2))) { | 2348 | rdev2->sectors))) { |
2229 | overlap = 1; | 2349 | overlap = 1; |
2230 | break; | 2350 | break; |
2231 | } | 2351 | } |
@@ -2239,11 +2359,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2239 | if (overlap) { | 2359 | if (overlap) { |
2240 | /* Someone else could have slipped in a size | 2360 | /* Someone else could have slipped in a size |
2241 | * change here, but doing so is just silly. | 2361 | * change here, but doing so is just silly. |
2242 | * We put oldsize back because we *know* it is | 2362 | * We put oldsectors back because we *know* it is |
2243 | * safe, and trust userspace not to race with | 2363 | * safe, and trust userspace not to race with |
2244 | * itself | 2364 | * itself |
2245 | */ | 2365 | */ |
2246 | rdev->size = oldsize; | 2366 | rdev->sectors = oldsectors; |
2247 | return -EBUSY; | 2367 | return -EBUSY; |
2248 | } | 2368 | } |
2249 | } | 2369 | } |
@@ -2547,18 +2667,101 @@ level_show(mddev_t *mddev, char *page) | |||
2547 | static ssize_t | 2667 | static ssize_t |
2548 | level_store(mddev_t *mddev, const char *buf, size_t len) | 2668 | level_store(mddev_t *mddev, const char *buf, size_t len) |
2549 | { | 2669 | { |
2670 | char level[16]; | ||
2550 | ssize_t rv = len; | 2671 | ssize_t rv = len; |
2551 | if (mddev->pers) | 2672 | struct mdk_personality *pers; |
2673 | void *priv; | ||
2674 | |||
2675 | if (mddev->pers == NULL) { | ||
2676 | if (len == 0) | ||
2677 | return 0; | ||
2678 | if (len >= sizeof(mddev->clevel)) | ||
2679 | return -ENOSPC; | ||
2680 | strncpy(mddev->clevel, buf, len); | ||
2681 | if (mddev->clevel[len-1] == '\n') | ||
2682 | len--; | ||
2683 | mddev->clevel[len] = 0; | ||
2684 | mddev->level = LEVEL_NONE; | ||
2685 | return rv; | ||
2686 | } | ||
2687 | |||
2688 | /* request to change the personality. Need to ensure: | ||
2689 | * - array is not engaged in resync/recovery/reshape | ||
2690 | * - old personality can be suspended | ||
2691 | * - new personality will access other array. | ||
2692 | */ | ||
2693 | |||
2694 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | ||
2552 | return -EBUSY; | 2695 | return -EBUSY; |
2553 | if (len == 0) | 2696 | |
2554 | return 0; | 2697 | if (!mddev->pers->quiesce) { |
2555 | if (len >= sizeof(mddev->clevel)) | 2698 | printk(KERN_WARNING "md: %s: %s does not support online personality change\n", |
2556 | return -ENOSPC; | 2699 | mdname(mddev), mddev->pers->name); |
2557 | strncpy(mddev->clevel, buf, len); | 2700 | return -EINVAL; |
2558 | if (mddev->clevel[len-1] == '\n') | 2701 | } |
2702 | |||
2703 | /* Now find the new personality */ | ||
2704 | if (len == 0 || len >= sizeof(level)) | ||
2705 | return -EINVAL; | ||
2706 | strncpy(level, buf, len); | ||
2707 | if (level[len-1] == '\n') | ||
2559 | len--; | 2708 | len--; |
2560 | mddev->clevel[len] = 0; | 2709 | level[len] = 0; |
2561 | mddev->level = LEVEL_NONE; | 2710 | |
2711 | request_module("md-%s", level); | ||
2712 | spin_lock(&pers_lock); | ||
2713 | pers = find_pers(LEVEL_NONE, level); | ||
2714 | if (!pers || !try_module_get(pers->owner)) { | ||
2715 | spin_unlock(&pers_lock); | ||
2716 | printk(KERN_WARNING "md: personality %s not loaded\n", level); | ||
2717 | return -EINVAL; | ||
2718 | } | ||
2719 | spin_unlock(&pers_lock); | ||
2720 | |||
2721 | if (pers == mddev->pers) { | ||
2722 | /* Nothing to do! */ | ||
2723 | module_put(pers->owner); | ||
2724 | return rv; | ||
2725 | } | ||
2726 | if (!pers->takeover) { | ||
2727 | module_put(pers->owner); | ||
2728 | printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", | ||
2729 | mdname(mddev), level); | ||
2730 | return -EINVAL; | ||
2731 | } | ||
2732 | |||
2733 | /* ->takeover must set new_* and/or delta_disks | ||
2734 | * if it succeeds, and may set them when it fails. | ||
2735 | */ | ||
2736 | priv = pers->takeover(mddev); | ||
2737 | if (IS_ERR(priv)) { | ||
2738 | mddev->new_level = mddev->level; | ||
2739 | mddev->new_layout = mddev->layout; | ||
2740 | mddev->new_chunk = mddev->chunk_size; | ||
2741 | mddev->raid_disks -= mddev->delta_disks; | ||
2742 | mddev->delta_disks = 0; | ||
2743 | module_put(pers->owner); | ||
2744 | printk(KERN_WARNING "md: %s: %s would not accept array\n", | ||
2745 | mdname(mddev), level); | ||
2746 | return PTR_ERR(priv); | ||
2747 | } | ||
2748 | |||
2749 | /* Looks like we have a winner */ | ||
2750 | mddev_suspend(mddev); | ||
2751 | mddev->pers->stop(mddev); | ||
2752 | module_put(mddev->pers->owner); | ||
2753 | mddev->pers = pers; | ||
2754 | mddev->private = priv; | ||
2755 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | ||
2756 | mddev->level = mddev->new_level; | ||
2757 | mddev->layout = mddev->new_layout; | ||
2758 | mddev->chunk_size = mddev->new_chunk; | ||
2759 | mddev->delta_disks = 0; | ||
2760 | pers->run(mddev); | ||
2761 | mddev_resume(mddev); | ||
2762 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
2763 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
2764 | md_wakeup_thread(mddev->thread); | ||
2562 | return rv; | 2765 | return rv; |
2563 | } | 2766 | } |
2564 | 2767 | ||
@@ -2586,12 +2789,18 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
2586 | if (!*buf || (*e && *e != '\n')) | 2789 | if (!*buf || (*e && *e != '\n')) |
2587 | return -EINVAL; | 2790 | return -EINVAL; |
2588 | 2791 | ||
2589 | if (mddev->pers) | 2792 | if (mddev->pers) { |
2590 | return -EBUSY; | 2793 | int err; |
2591 | if (mddev->reshape_position != MaxSector) | 2794 | if (mddev->pers->reconfig == NULL) |
2795 | return -EBUSY; | ||
2796 | err = mddev->pers->reconfig(mddev, n, -1); | ||
2797 | if (err) | ||
2798 | return err; | ||
2799 | } else { | ||
2592 | mddev->new_layout = n; | 2800 | mddev->new_layout = n; |
2593 | else | 2801 | if (mddev->reshape_position == MaxSector) |
2594 | mddev->layout = n; | 2802 | mddev->layout = n; |
2803 | } | ||
2595 | return len; | 2804 | return len; |
2596 | } | 2805 | } |
2597 | static struct md_sysfs_entry md_layout = | 2806 | static struct md_sysfs_entry md_layout = |
@@ -2648,19 +2857,24 @@ chunk_size_show(mddev_t *mddev, char *page) | |||
2648 | static ssize_t | 2857 | static ssize_t |
2649 | chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | 2858 | chunk_size_store(mddev_t *mddev, const char *buf, size_t len) |
2650 | { | 2859 | { |
2651 | /* can only set chunk_size if array is not yet active */ | ||
2652 | char *e; | 2860 | char *e; |
2653 | unsigned long n = simple_strtoul(buf, &e, 10); | 2861 | unsigned long n = simple_strtoul(buf, &e, 10); |
2654 | 2862 | ||
2655 | if (!*buf || (*e && *e != '\n')) | 2863 | if (!*buf || (*e && *e != '\n')) |
2656 | return -EINVAL; | 2864 | return -EINVAL; |
2657 | 2865 | ||
2658 | if (mddev->pers) | 2866 | if (mddev->pers) { |
2659 | return -EBUSY; | 2867 | int err; |
2660 | else if (mddev->reshape_position != MaxSector) | 2868 | if (mddev->pers->reconfig == NULL) |
2869 | return -EBUSY; | ||
2870 | err = mddev->pers->reconfig(mddev, -1, n); | ||
2871 | if (err) | ||
2872 | return err; | ||
2873 | } else { | ||
2661 | mddev->new_chunk = n; | 2874 | mddev->new_chunk = n; |
2662 | else | 2875 | if (mddev->reshape_position == MaxSector) |
2663 | mddev->chunk_size = n; | 2876 | mddev->chunk_size = n; |
2877 | } | ||
2664 | return len; | 2878 | return len; |
2665 | } | 2879 | } |
2666 | static struct md_sysfs_entry md_chunk_size = | 2880 | static struct md_sysfs_entry md_chunk_size = |
@@ -2669,6 +2883,8 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); | |||
2669 | static ssize_t | 2883 | static ssize_t |
2670 | resync_start_show(mddev_t *mddev, char *page) | 2884 | resync_start_show(mddev_t *mddev, char *page) |
2671 | { | 2885 | { |
2886 | if (mddev->recovery_cp == MaxSector) | ||
2887 | return sprintf(page, "none\n"); | ||
2672 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); | 2888 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); |
2673 | } | 2889 | } |
2674 | 2890 | ||
@@ -2766,7 +2982,7 @@ array_state_show(mddev_t *mddev, char *page) | |||
2766 | else { | 2982 | else { |
2767 | if (list_empty(&mddev->disks) && | 2983 | if (list_empty(&mddev->disks) && |
2768 | mddev->raid_disks == 0 && | 2984 | mddev->raid_disks == 0 && |
2769 | mddev->size == 0) | 2985 | mddev->dev_sectors == 0) |
2770 | st = clear; | 2986 | st = clear; |
2771 | else | 2987 | else |
2772 | st = inactive; | 2988 | st = inactive; |
@@ -2973,7 +3189,8 @@ __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store); | |||
2973 | static ssize_t | 3189 | static ssize_t |
2974 | size_show(mddev_t *mddev, char *page) | 3190 | size_show(mddev_t *mddev, char *page) |
2975 | { | 3191 | { |
2976 | return sprintf(page, "%llu\n", (unsigned long long)mddev->size); | 3192 | return sprintf(page, "%llu\n", |
3193 | (unsigned long long)mddev->dev_sectors / 2); | ||
2977 | } | 3194 | } |
2978 | 3195 | ||
2979 | static int update_size(mddev_t *mddev, sector_t num_sectors); | 3196 | static int update_size(mddev_t *mddev, sector_t num_sectors); |
@@ -2985,20 +3202,18 @@ size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2985 | * not increase it (except from 0). | 3202 | * not increase it (except from 0). |
2986 | * If array is active, we can try an on-line resize | 3203 | * If array is active, we can try an on-line resize |
2987 | */ | 3204 | */ |
2988 | char *e; | 3205 | sector_t sectors; |
2989 | int err = 0; | 3206 | int err = strict_blocks_to_sectors(buf, §ors); |
2990 | unsigned long long size = simple_strtoull(buf, &e, 10); | ||
2991 | if (!*buf || *buf == '\n' || | ||
2992 | (*e && *e != '\n')) | ||
2993 | return -EINVAL; | ||
2994 | 3207 | ||
3208 | if (err < 0) | ||
3209 | return err; | ||
2995 | if (mddev->pers) { | 3210 | if (mddev->pers) { |
2996 | err = update_size(mddev, size * 2); | 3211 | err = update_size(mddev, sectors); |
2997 | md_update_sb(mddev, 1); | 3212 | md_update_sb(mddev, 1); |
2998 | } else { | 3213 | } else { |
2999 | if (mddev->size == 0 || | 3214 | if (mddev->dev_sectors == 0 || |
3000 | mddev->size > size) | 3215 | mddev->dev_sectors > sectors) |
3001 | mddev->size = size; | 3216 | mddev->dev_sectors = sectors; |
3002 | else | 3217 | else |
3003 | err = -ENOSPC; | 3218 | err = -ENOSPC; |
3004 | } | 3219 | } |
@@ -3251,6 +3466,8 @@ static ssize_t | |||
3251 | sync_speed_show(mddev_t *mddev, char *page) | 3466 | sync_speed_show(mddev_t *mddev, char *page) |
3252 | { | 3467 | { |
3253 | unsigned long resync, dt, db; | 3468 | unsigned long resync, dt, db; |
3469 | if (mddev->curr_resync == 0) | ||
3470 | return sprintf(page, "none\n"); | ||
3254 | resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); | 3471 | resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); |
3255 | dt = (jiffies - mddev->resync_mark) / HZ; | 3472 | dt = (jiffies - mddev->resync_mark) / HZ; |
3256 | if (!dt) dt++; | 3473 | if (!dt) dt++; |
@@ -3263,15 +3480,15 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); | |||
3263 | static ssize_t | 3480 | static ssize_t |
3264 | sync_completed_show(mddev_t *mddev, char *page) | 3481 | sync_completed_show(mddev_t *mddev, char *page) |
3265 | { | 3482 | { |
3266 | unsigned long max_blocks, resync; | 3483 | unsigned long max_sectors, resync; |
3267 | 3484 | ||
3268 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3485 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
3269 | max_blocks = mddev->resync_max_sectors; | 3486 | max_sectors = mddev->resync_max_sectors; |
3270 | else | 3487 | else |
3271 | max_blocks = mddev->size << 1; | 3488 | max_sectors = mddev->dev_sectors; |
3272 | 3489 | ||
3273 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); | 3490 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); |
3274 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); | 3491 | return sprintf(page, "%lu / %lu\n", resync, max_sectors); |
3275 | } | 3492 | } |
3276 | 3493 | ||
3277 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 3494 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
@@ -3431,6 +3648,57 @@ static struct md_sysfs_entry md_reshape_position = | |||
3431 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, | 3648 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, |
3432 | reshape_position_store); | 3649 | reshape_position_store); |
3433 | 3650 | ||
3651 | static ssize_t | ||
3652 | array_size_show(mddev_t *mddev, char *page) | ||
3653 | { | ||
3654 | if (mddev->external_size) | ||
3655 | return sprintf(page, "%llu\n", | ||
3656 | (unsigned long long)mddev->array_sectors/2); | ||
3657 | else | ||
3658 | return sprintf(page, "default\n"); | ||
3659 | } | ||
3660 | |||
3661 | static ssize_t | ||
3662 | array_size_store(mddev_t *mddev, const char *buf, size_t len) | ||
3663 | { | ||
3664 | sector_t sectors; | ||
3665 | |||
3666 | if (strncmp(buf, "default", 7) == 0) { | ||
3667 | if (mddev->pers) | ||
3668 | sectors = mddev->pers->size(mddev, 0, 0); | ||
3669 | else | ||
3670 | sectors = mddev->array_sectors; | ||
3671 | |||
3672 | mddev->external_size = 0; | ||
3673 | } else { | ||
3674 | if (strict_blocks_to_sectors(buf, §ors) < 0) | ||
3675 | return -EINVAL; | ||
3676 | if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) | ||
3677 | return -EINVAL; | ||
3678 | |||
3679 | mddev->external_size = 1; | ||
3680 | } | ||
3681 | |||
3682 | mddev->array_sectors = sectors; | ||
3683 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
3684 | if (mddev->pers) { | ||
3685 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | ||
3686 | |||
3687 | if (bdev) { | ||
3688 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3689 | i_size_write(bdev->bd_inode, | ||
3690 | (loff_t)mddev->array_sectors << 9); | ||
3691 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3692 | bdput(bdev); | ||
3693 | } | ||
3694 | } | ||
3695 | |||
3696 | return len; | ||
3697 | } | ||
3698 | |||
3699 | static struct md_sysfs_entry md_array_size = | ||
3700 | __ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show, | ||
3701 | array_size_store); | ||
3434 | 3702 | ||
3435 | static struct attribute *md_default_attrs[] = { | 3703 | static struct attribute *md_default_attrs[] = { |
3436 | &md_level.attr, | 3704 | &md_level.attr, |
@@ -3444,6 +3712,7 @@ static struct attribute *md_default_attrs[] = { | |||
3444 | &md_safe_delay.attr, | 3712 | &md_safe_delay.attr, |
3445 | &md_array_state.attr, | 3713 | &md_array_state.attr, |
3446 | &md_reshape_position.attr, | 3714 | &md_reshape_position.attr, |
3715 | &md_array_size.attr, | ||
3447 | NULL, | 3716 | NULL, |
3448 | }; | 3717 | }; |
3449 | 3718 | ||
@@ -3602,10 +3871,12 @@ static int md_alloc(dev_t dev, char *name) | |||
3602 | mddev_put(mddev); | 3871 | mddev_put(mddev); |
3603 | return -ENOMEM; | 3872 | return -ENOMEM; |
3604 | } | 3873 | } |
3874 | mddev->queue->queuedata = mddev; | ||
3875 | |||
3605 | /* Can be unlocked because the queue is new: no concurrency */ | 3876 | /* Can be unlocked because the queue is new: no concurrency */ |
3606 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); | 3877 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); |
3607 | 3878 | ||
3608 | blk_queue_make_request(mddev->queue, md_fail_request); | 3879 | blk_queue_make_request(mddev->queue, md_make_request); |
3609 | 3880 | ||
3610 | disk = alloc_disk(1 << shift); | 3881 | disk = alloc_disk(1 << shift); |
3611 | if (!disk) { | 3882 | if (!disk) { |
@@ -3731,13 +4002,13 @@ static int do_md_run(mddev_t * mddev) | |||
3731 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4002 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3732 | if (test_bit(Faulty, &rdev->flags)) | 4003 | if (test_bit(Faulty, &rdev->flags)) |
3733 | continue; | 4004 | continue; |
3734 | if (rdev->size < chunk_size / 1024) { | 4005 | if (rdev->sectors < chunk_size / 512) { |
3735 | printk(KERN_WARNING | 4006 | printk(KERN_WARNING |
3736 | "md: Dev %s smaller than chunk_size:" | 4007 | "md: Dev %s smaller than chunk_size:" |
3737 | " %lluk < %dk\n", | 4008 | " %llu < %d\n", |
3738 | bdevname(rdev->bdev,b), | 4009 | bdevname(rdev->bdev,b), |
3739 | (unsigned long long)rdev->size, | 4010 | (unsigned long long)rdev->sectors, |
3740 | chunk_size / 1024); | 4011 | chunk_size / 512); |
3741 | return -EINVAL; | 4012 | return -EINVAL; |
3742 | } | 4013 | } |
3743 | } | 4014 | } |
@@ -3761,11 +4032,11 @@ static int do_md_run(mddev_t * mddev) | |||
3761 | 4032 | ||
3762 | /* perform some consistency tests on the device. | 4033 | /* perform some consistency tests on the device. |
3763 | * We don't want the data to overlap the metadata, | 4034 | * We don't want the data to overlap the metadata, |
3764 | * Internal Bitmap issues has handled elsewhere. | 4035 | * Internal Bitmap issues have been handled elsewhere. |
3765 | */ | 4036 | */ |
3766 | if (rdev->data_offset < rdev->sb_start) { | 4037 | if (rdev->data_offset < rdev->sb_start) { |
3767 | if (mddev->size && | 4038 | if (mddev->dev_sectors && |
3768 | rdev->data_offset + mddev->size*2 | 4039 | rdev->data_offset + mddev->dev_sectors |
3769 | > rdev->sb_start) { | 4040 | > rdev->sb_start) { |
3770 | printk("md: %s: data overlaps metadata\n", | 4041 | printk("md: %s: data overlaps metadata\n", |
3771 | mdname(mddev)); | 4042 | mdname(mddev)); |
@@ -3801,9 +4072,16 @@ static int do_md_run(mddev_t * mddev) | |||
3801 | } | 4072 | } |
3802 | mddev->pers = pers; | 4073 | mddev->pers = pers; |
3803 | spin_unlock(&pers_lock); | 4074 | spin_unlock(&pers_lock); |
3804 | mddev->level = pers->level; | 4075 | if (mddev->level != pers->level) { |
4076 | mddev->level = pers->level; | ||
4077 | mddev->new_level = pers->level; | ||
4078 | } | ||
3805 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4079 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
3806 | 4080 | ||
4081 | if (pers->level >= 4 && pers->level <= 6) | ||
4082 | /* Cannot support integrity (yet) */ | ||
4083 | blk_integrity_unregister(mddev->gendisk); | ||
4084 | |||
3807 | if (mddev->reshape_position != MaxSector && | 4085 | if (mddev->reshape_position != MaxSector && |
3808 | pers->start_reshape == NULL) { | 4086 | pers->start_reshape == NULL) { |
3809 | /* This personality cannot handle reshaping... */ | 4087 | /* This personality cannot handle reshaping... */ |
@@ -3843,7 +4121,9 @@ static int do_md_run(mddev_t * mddev) | |||
3843 | } | 4121 | } |
3844 | 4122 | ||
3845 | mddev->recovery = 0; | 4123 | mddev->recovery = 0; |
3846 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 4124 | /* may be over-ridden by personality */ |
4125 | mddev->resync_max_sectors = mddev->dev_sectors; | ||
4126 | |||
3847 | mddev->barriers_work = 1; | 4127 | mddev->barriers_work = 1; |
3848 | mddev->ok_start_degraded = start_dirty_degraded; | 4128 | mddev->ok_start_degraded = start_dirty_degraded; |
3849 | 4129 | ||
@@ -3853,7 +4133,17 @@ static int do_md_run(mddev_t * mddev) | |||
3853 | err = mddev->pers->run(mddev); | 4133 | err = mddev->pers->run(mddev); |
3854 | if (err) | 4134 | if (err) |
3855 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 4135 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
3856 | else if (mddev->pers->sync_request) { | 4136 | else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) { |
4137 | WARN_ONCE(!mddev->external_size, "%s: default size too small," | ||
4138 | " but 'external_size' not in effect?\n", __func__); | ||
4139 | printk(KERN_ERR | ||
4140 | "md: invalid array_size %llu > default size %llu\n", | ||
4141 | (unsigned long long)mddev->array_sectors / 2, | ||
4142 | (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2); | ||
4143 | err = -EINVAL; | ||
4144 | mddev->pers->stop(mddev); | ||
4145 | } | ||
4146 | if (err == 0 && mddev->pers->sync_request) { | ||
3857 | err = bitmap_create(mddev); | 4147 | err = bitmap_create(mddev); |
3858 | if (err) { | 4148 | if (err) { |
3859 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | 4149 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", |
@@ -3899,16 +4189,6 @@ static int do_md_run(mddev_t * mddev) | |||
3899 | 4189 | ||
3900 | set_capacity(disk, mddev->array_sectors); | 4190 | set_capacity(disk, mddev->array_sectors); |
3901 | 4191 | ||
3902 | /* If we call blk_queue_make_request here, it will | ||
3903 | * re-initialise max_sectors etc which may have been | ||
3904 | * refined inside -> run. So just set the bits we need to set. | ||
3905 | * Most initialisation happended when we called | ||
3906 | * blk_queue_make_request(..., md_fail_request) | ||
3907 | * earlier. | ||
3908 | */ | ||
3909 | mddev->queue->queuedata = mddev; | ||
3910 | mddev->queue->make_request_fn = mddev->pers->make_request; | ||
3911 | |||
3912 | /* If there is a partially-recovered drive we need to | 4192 | /* If there is a partially-recovered drive we need to |
3913 | * start recovery here. If we leave it to md_check_recovery, | 4193 | * start recovery here. If we leave it to md_check_recovery, |
3914 | * it will remove the drives and not do the right thing | 4194 | * it will remove the drives and not do the right thing |
@@ -4038,7 +4318,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4038 | md_super_wait(mddev); | 4318 | md_super_wait(mddev); |
4039 | if (mddev->ro) | 4319 | if (mddev->ro) |
4040 | set_disk_ro(disk, 0); | 4320 | set_disk_ro(disk, 0); |
4041 | blk_queue_make_request(mddev->queue, md_fail_request); | 4321 | |
4042 | mddev->pers->stop(mddev); | 4322 | mddev->pers->stop(mddev); |
4043 | mddev->queue->merge_bvec_fn = NULL; | 4323 | mddev->queue->merge_bvec_fn = NULL; |
4044 | mddev->queue->unplug_fn = NULL; | 4324 | mddev->queue->unplug_fn = NULL; |
@@ -4095,7 +4375,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4095 | export_array(mddev); | 4375 | export_array(mddev); |
4096 | 4376 | ||
4097 | mddev->array_sectors = 0; | 4377 | mddev->array_sectors = 0; |
4098 | mddev->size = 0; | 4378 | mddev->external_size = 0; |
4379 | mddev->dev_sectors = 0; | ||
4099 | mddev->raid_disks = 0; | 4380 | mddev->raid_disks = 0; |
4100 | mddev->recovery_cp = 0; | 4381 | mddev->recovery_cp = 0; |
4101 | mddev->resync_min = 0; | 4382 | mddev->resync_min = 0; |
@@ -4135,6 +4416,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4135 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 4416 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
4136 | mdname(mddev)); | 4417 | mdname(mddev)); |
4137 | err = 0; | 4418 | err = 0; |
4419 | blk_integrity_unregister(disk); | ||
4138 | md_new_event(mddev); | 4420 | md_new_event(mddev); |
4139 | sysfs_notify_dirent(mddev->sysfs_state); | 4421 | sysfs_notify_dirent(mddev->sysfs_state); |
4140 | out: | 4422 | out: |
@@ -4300,8 +4582,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4300 | info.patch_version = MD_PATCHLEVEL_VERSION; | 4582 | info.patch_version = MD_PATCHLEVEL_VERSION; |
4301 | info.ctime = mddev->ctime; | 4583 | info.ctime = mddev->ctime; |
4302 | info.level = mddev->level; | 4584 | info.level = mddev->level; |
4303 | info.size = mddev->size; | 4585 | info.size = mddev->dev_sectors / 2; |
4304 | if (info.size != mddev->size) /* overflow */ | 4586 | if (info.size != mddev->dev_sectors / 2) /* overflow */ |
4305 | info.size = -1; | 4587 | info.size = -1; |
4306 | info.nr_disks = nr; | 4588 | info.nr_disks = nr; |
4307 | info.raid_disks = mddev->raid_disks; | 4589 | info.raid_disks = mddev->raid_disks; |
@@ -4480,6 +4762,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4480 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ | 4762 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ |
4481 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 4763 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
4482 | set_bit(WriteMostly, &rdev->flags); | 4764 | set_bit(WriteMostly, &rdev->flags); |
4765 | else | ||
4766 | clear_bit(WriteMostly, &rdev->flags); | ||
4483 | 4767 | ||
4484 | rdev->raid_disk = -1; | 4768 | rdev->raid_disk = -1; |
4485 | err = bind_rdev_to_array(rdev, mddev); | 4769 | err = bind_rdev_to_array(rdev, mddev); |
@@ -4543,7 +4827,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4543 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4827 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4544 | } else | 4828 | } else |
4545 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); | 4829 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
4546 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; | 4830 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); |
4547 | 4831 | ||
4548 | err = bind_rdev_to_array(rdev, mddev); | 4832 | err = bind_rdev_to_array(rdev, mddev); |
4549 | if (err) { | 4833 | if (err) { |
@@ -4613,7 +4897,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
4613 | else | 4897 | else |
4614 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; | 4898 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4615 | 4899 | ||
4616 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; | 4900 | rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); |
4617 | 4901 | ||
4618 | if (test_bit(Faulty, &rdev->flags)) { | 4902 | if (test_bit(Faulty, &rdev->flags)) { |
4619 | printk(KERN_WARNING | 4903 | printk(KERN_WARNING |
@@ -4749,7 +5033,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4749 | 5033 | ||
4750 | mddev->level = info->level; | 5034 | mddev->level = info->level; |
4751 | mddev->clevel[0] = 0; | 5035 | mddev->clevel[0] = 0; |
4752 | mddev->size = info->size; | 5036 | mddev->dev_sectors = 2 * (sector_t)info->size; |
4753 | mddev->raid_disks = info->raid_disks; | 5037 | mddev->raid_disks = info->raid_disks; |
4754 | /* don't set md_minor, it is determined by which /dev/md* was | 5038 | /* don't set md_minor, it is determined by which /dev/md* was |
4755 | * openned | 5039 | * openned |
@@ -4788,6 +5072,17 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4788 | return 0; | 5072 | return 0; |
4789 | } | 5073 | } |
4790 | 5074 | ||
5075 | void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors) | ||
5076 | { | ||
5077 | WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__); | ||
5078 | |||
5079 | if (mddev->external_size) | ||
5080 | return; | ||
5081 | |||
5082 | mddev->array_sectors = array_sectors; | ||
5083 | } | ||
5084 | EXPORT_SYMBOL(md_set_array_sectors); | ||
5085 | |||
4791 | static int update_size(mddev_t *mddev, sector_t num_sectors) | 5086 | static int update_size(mddev_t *mddev, sector_t num_sectors) |
4792 | { | 5087 | { |
4793 | mdk_rdev_t *rdev; | 5088 | mdk_rdev_t *rdev; |
@@ -4814,8 +5109,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
4814 | */ | 5109 | */ |
4815 | return -EBUSY; | 5110 | return -EBUSY; |
4816 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5111 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4817 | sector_t avail; | 5112 | sector_t avail = rdev->sectors; |
4818 | avail = rdev->size * 2; | ||
4819 | 5113 | ||
4820 | if (fit && (num_sectors == 0 || num_sectors > avail)) | 5114 | if (fit && (num_sectors == 0 || num_sectors > avail)) |
4821 | num_sectors = avail; | 5115 | num_sectors = avail; |
@@ -4887,12 +5181,18 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
4887 | ) | 5181 | ) |
4888 | return -EINVAL; | 5182 | return -EINVAL; |
4889 | /* Check there is only one change */ | 5183 | /* Check there is only one change */ |
4890 | if (info->size >= 0 && mddev->size != info->size) cnt++; | 5184 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) |
4891 | if (mddev->raid_disks != info->raid_disks) cnt++; | 5185 | cnt++; |
4892 | if (mddev->layout != info->layout) cnt++; | 5186 | if (mddev->raid_disks != info->raid_disks) |
4893 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; | 5187 | cnt++; |
4894 | if (cnt == 0) return 0; | 5188 | if (mddev->layout != info->layout) |
4895 | if (cnt > 1) return -EINVAL; | 5189 | cnt++; |
5190 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) | ||
5191 | cnt++; | ||
5192 | if (cnt == 0) | ||
5193 | return 0; | ||
5194 | if (cnt > 1) | ||
5195 | return -EINVAL; | ||
4896 | 5196 | ||
4897 | if (mddev->layout != info->layout) { | 5197 | if (mddev->layout != info->layout) { |
4898 | /* Change layout | 5198 | /* Change layout |
@@ -4904,7 +5204,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
4904 | else | 5204 | else |
4905 | return mddev->pers->reconfig(mddev, info->layout, -1); | 5205 | return mddev->pers->reconfig(mddev, info->layout, -1); |
4906 | } | 5206 | } |
4907 | if (info->size >= 0 && mddev->size != info->size) | 5207 | if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) |
4908 | rv = update_size(mddev, (sector_t)info->size * 2); | 5208 | rv = update_size(mddev, (sector_t)info->size * 2); |
4909 | 5209 | ||
4910 | if (mddev->raid_disks != info->raid_disks) | 5210 | if (mddev->raid_disks != info->raid_disks) |
@@ -5331,6 +5631,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
5331 | 5631 | ||
5332 | void md_unregister_thread(mdk_thread_t *thread) | 5632 | void md_unregister_thread(mdk_thread_t *thread) |
5333 | { | 5633 | { |
5634 | if (!thread) | ||
5635 | return; | ||
5334 | dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); | 5636 | dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); |
5335 | 5637 | ||
5336 | kthread_stop(thread->tsk); | 5638 | kthread_stop(thread->tsk); |
@@ -5404,7 +5706,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
5404 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 5706 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
5405 | max_blocks = mddev->resync_max_sectors >> 1; | 5707 | max_blocks = mddev->resync_max_sectors >> 1; |
5406 | else | 5708 | else |
5407 | max_blocks = mddev->size; | 5709 | max_blocks = mddev->dev_sectors / 2; |
5408 | 5710 | ||
5409 | /* | 5711 | /* |
5410 | * Should not happen. | 5712 | * Should not happen. |
@@ -5537,7 +5839,7 @@ struct mdstat_info { | |||
5537 | static int md_seq_show(struct seq_file *seq, void *v) | 5839 | static int md_seq_show(struct seq_file *seq, void *v) |
5538 | { | 5840 | { |
5539 | mddev_t *mddev = v; | 5841 | mddev_t *mddev = v; |
5540 | sector_t size; | 5842 | sector_t sectors; |
5541 | mdk_rdev_t *rdev; | 5843 | mdk_rdev_t *rdev; |
5542 | struct mdstat_info *mi = seq->private; | 5844 | struct mdstat_info *mi = seq->private; |
5543 | struct bitmap *bitmap; | 5845 | struct bitmap *bitmap; |
@@ -5573,7 +5875,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5573 | seq_printf(seq, " %s", mddev->pers->name); | 5875 | seq_printf(seq, " %s", mddev->pers->name); |
5574 | } | 5876 | } |
5575 | 5877 | ||
5576 | size = 0; | 5878 | sectors = 0; |
5577 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 5879 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
5578 | char b[BDEVNAME_SIZE]; | 5880 | char b[BDEVNAME_SIZE]; |
5579 | seq_printf(seq, " %s[%d]", | 5881 | seq_printf(seq, " %s[%d]", |
@@ -5585,7 +5887,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5585 | continue; | 5887 | continue; |
5586 | } else if (rdev->raid_disk < 0) | 5888 | } else if (rdev->raid_disk < 0) |
5587 | seq_printf(seq, "(S)"); /* spare */ | 5889 | seq_printf(seq, "(S)"); /* spare */ |
5588 | size += rdev->size; | 5890 | sectors += rdev->sectors; |
5589 | } | 5891 | } |
5590 | 5892 | ||
5591 | if (!list_empty(&mddev->disks)) { | 5893 | if (!list_empty(&mddev->disks)) { |
@@ -5595,7 +5897,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5595 | mddev->array_sectors / 2); | 5897 | mddev->array_sectors / 2); |
5596 | else | 5898 | else |
5597 | seq_printf(seq, "\n %llu blocks", | 5899 | seq_printf(seq, "\n %llu blocks", |
5598 | (unsigned long long)size); | 5900 | (unsigned long long)sectors / 2); |
5599 | } | 5901 | } |
5600 | if (mddev->persistent) { | 5902 | if (mddev->persistent) { |
5601 | if (mddev->major_version != 0 || | 5903 | if (mddev->major_version != 0 || |
@@ -5722,19 +6024,19 @@ int unregister_md_personality(struct mdk_personality *p) | |||
5722 | return 0; | 6024 | return 0; |
5723 | } | 6025 | } |
5724 | 6026 | ||
5725 | static int is_mddev_idle(mddev_t *mddev) | 6027 | static int is_mddev_idle(mddev_t *mddev, int init) |
5726 | { | 6028 | { |
5727 | mdk_rdev_t * rdev; | 6029 | mdk_rdev_t * rdev; |
5728 | int idle; | 6030 | int idle; |
5729 | long curr_events; | 6031 | int curr_events; |
5730 | 6032 | ||
5731 | idle = 1; | 6033 | idle = 1; |
5732 | rcu_read_lock(); | 6034 | rcu_read_lock(); |
5733 | rdev_for_each_rcu(rdev, mddev) { | 6035 | rdev_for_each_rcu(rdev, mddev) { |
5734 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 6036 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5735 | curr_events = part_stat_read(&disk->part0, sectors[0]) + | 6037 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
5736 | part_stat_read(&disk->part0, sectors[1]) - | 6038 | (int)part_stat_read(&disk->part0, sectors[1]) - |
5737 | atomic_read(&disk->sync_io); | 6039 | atomic_read(&disk->sync_io); |
5738 | /* sync IO will cause sync_io to increase before the disk_stats | 6040 | /* sync IO will cause sync_io to increase before the disk_stats |
5739 | * as sync_io is counted when a request starts, and | 6041 | * as sync_io is counted when a request starts, and |
5740 | * disk_stats is counted when it completes. | 6042 | * disk_stats is counted when it completes. |
@@ -5757,7 +6059,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5757 | * always make curr_events less than last_events. | 6059 | * always make curr_events less than last_events. |
5758 | * | 6060 | * |
5759 | */ | 6061 | */ |
5760 | if (curr_events - rdev->last_events > 4096) { | 6062 | if (init || curr_events - rdev->last_events > 64) { |
5761 | rdev->last_events = curr_events; | 6063 | rdev->last_events = curr_events; |
5762 | idle = 0; | 6064 | idle = 0; |
5763 | } | 6065 | } |
@@ -5980,10 +6282,10 @@ void md_do_sync(mddev_t *mddev) | |||
5980 | j = mddev->recovery_cp; | 6282 | j = mddev->recovery_cp; |
5981 | 6283 | ||
5982 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 6284 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
5983 | max_sectors = mddev->size << 1; | 6285 | max_sectors = mddev->dev_sectors; |
5984 | else { | 6286 | else { |
5985 | /* recovery follows the physical size of devices */ | 6287 | /* recovery follows the physical size of devices */ |
5986 | max_sectors = mddev->size << 1; | 6288 | max_sectors = mddev->dev_sectors; |
5987 | j = MaxSector; | 6289 | j = MaxSector; |
5988 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6290 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5989 | if (rdev->raid_disk >= 0 && | 6291 | if (rdev->raid_disk >= 0 && |
@@ -6000,7 +6302,7 @@ void md_do_sync(mddev_t *mddev) | |||
6000 | "(but not more than %d KB/sec) for %s.\n", | 6302 | "(but not more than %d KB/sec) for %s.\n", |
6001 | speed_max(mddev), desc); | 6303 | speed_max(mddev), desc); |
6002 | 6304 | ||
6003 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 6305 | is_mddev_idle(mddev, 1); /* this initializes IO event counters */ |
6004 | 6306 | ||
6005 | io_sectors = 0; | 6307 | io_sectors = 0; |
6006 | for (m = 0; m < SYNC_MARKS; m++) { | 6308 | for (m = 0; m < SYNC_MARKS; m++) { |
@@ -6040,6 +6342,18 @@ void md_do_sync(mddev_t *mddev) | |||
6040 | } | 6342 | } |
6041 | if (kthread_should_stop()) | 6343 | if (kthread_should_stop()) |
6042 | goto interrupted; | 6344 | goto interrupted; |
6345 | |||
6346 | if (mddev->curr_resync > mddev->curr_resync_completed && | ||
6347 | (mddev->curr_resync - mddev->curr_resync_completed) | ||
6348 | > (max_sectors >> 4)) { | ||
6349 | /* time to update curr_resync_completed */ | ||
6350 | blk_unplug(mddev->queue); | ||
6351 | wait_event(mddev->recovery_wait, | ||
6352 | atomic_read(&mddev->recovery_active) == 0); | ||
6353 | mddev->curr_resync_completed = | ||
6354 | mddev->curr_resync; | ||
6355 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
6356 | } | ||
6043 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 6357 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
6044 | currspeed < speed_min(mddev)); | 6358 | currspeed < speed_min(mddev)); |
6045 | if (sectors == 0) { | 6359 | if (sectors == 0) { |
@@ -6102,7 +6416,7 @@ void md_do_sync(mddev_t *mddev) | |||
6102 | 6416 | ||
6103 | if (currspeed > speed_min(mddev)) { | 6417 | if (currspeed > speed_min(mddev)) { |
6104 | if ((currspeed > speed_max(mddev)) || | 6418 | if ((currspeed > speed_max(mddev)) || |
6105 | !is_mddev_idle(mddev)) { | 6419 | !is_mddev_idle(mddev, 0)) { |
6106 | msleep(500); | 6420 | msleep(500); |
6107 | goto repeat; | 6421 | goto repeat; |
6108 | } | 6422 | } |
@@ -6173,6 +6487,8 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
6173 | mdk_rdev_t *rdev; | 6487 | mdk_rdev_t *rdev; |
6174 | int spares = 0; | 6488 | int spares = 0; |
6175 | 6489 | ||
6490 | mddev->curr_resync_completed = 0; | ||
6491 | |||
6176 | list_for_each_entry(rdev, &mddev->disks, same_set) | 6492 | list_for_each_entry(rdev, &mddev->disks, same_set) |
6177 | if (rdev->raid_disk >= 0 && | 6493 | if (rdev->raid_disk >= 0 && |
6178 | !test_bit(Blocked, &rdev->flags) && | 6494 | !test_bit(Blocked, &rdev->flags) && |
@@ -6327,6 +6643,9 @@ void md_check_recovery(mddev_t *mddev) | |||
6327 | sysfs_notify(&mddev->kobj, NULL, | 6643 | sysfs_notify(&mddev->kobj, NULL, |
6328 | "degraded"); | 6644 | "degraded"); |
6329 | } | 6645 | } |
6646 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
6647 | mddev->pers->finish_reshape) | ||
6648 | mddev->pers->finish_reshape(mddev); | ||
6330 | md_update_sb(mddev, 1); | 6649 | md_update_sb(mddev, 1); |
6331 | 6650 | ||
6332 | /* if array is no-longer degraded, then any saved_raid_disk | 6651 | /* if array is no-longer degraded, then any saved_raid_disk |
@@ -6470,13 +6789,13 @@ static void md_geninit(void) | |||
6470 | 6789 | ||
6471 | static int __init md_init(void) | 6790 | static int __init md_init(void) |
6472 | { | 6791 | { |
6473 | if (register_blkdev(MAJOR_NR, "md")) | 6792 | if (register_blkdev(MD_MAJOR, "md")) |
6474 | return -1; | 6793 | return -1; |
6475 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { | 6794 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { |
6476 | unregister_blkdev(MAJOR_NR, "md"); | 6795 | unregister_blkdev(MD_MAJOR, "md"); |
6477 | return -1; | 6796 | return -1; |
6478 | } | 6797 | } |
6479 | blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE, | 6798 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, |
6480 | md_probe, NULL, NULL); | 6799 | md_probe, NULL, NULL); |
6481 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, | 6800 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, |
6482 | md_probe, NULL, NULL); | 6801 | md_probe, NULL, NULL); |
@@ -6562,10 +6881,10 @@ static __exit void md_exit(void) | |||
6562 | mddev_t *mddev; | 6881 | mddev_t *mddev; |
6563 | struct list_head *tmp; | 6882 | struct list_head *tmp; |
6564 | 6883 | ||
6565 | blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS); | 6884 | blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS); |
6566 | blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); | 6885 | blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); |
6567 | 6886 | ||
6568 | unregister_blkdev(MAJOR_NR,"md"); | 6887 | unregister_blkdev(MD_MAJOR,"md"); |
6569 | unregister_blkdev(mdp_major, "mdp"); | 6888 | unregister_blkdev(mdp_major, "mdp"); |
6570 | unregister_reboot_notifier(&md_notifier); | 6889 | unregister_reboot_notifier(&md_notifier); |
6571 | unregister_sysctl_table(raid_table_header); | 6890 | unregister_sysctl_table(raid_table_header); |