aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c754
1 files changed, 609 insertions, 145 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f19b874753a..8dbab2ef388 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -33,17 +33,16 @@
33*/ 33*/
34 34
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/config.h>
37#include <linux/kthread.h> 36#include <linux/kthread.h>
38#include <linux/linkage.h> 37#include <linux/linkage.h>
39#include <linux/raid/md.h> 38#include <linux/raid/md.h>
40#include <linux/raid/bitmap.h> 39#include <linux/raid/bitmap.h>
41#include <linux/sysctl.h> 40#include <linux/sysctl.h>
42#include <linux/devfs_fs_kernel.h>
43#include <linux/buffer_head.h> /* for invalidate_bdev */ 41#include <linux/buffer_head.h> /* for invalidate_bdev */
44#include <linux/suspend.h> 42#include <linux/suspend.h>
45#include <linux/poll.h> 43#include <linux/poll.h>
46#include <linux/mutex.h> 44#include <linux/mutex.h>
45#include <linux/ctype.h>
47 46
48#include <linux/init.h> 47#include <linux/init.h>
49 48
@@ -72,6 +71,10 @@ static void autostart_arrays (int part);
72static LIST_HEAD(pers_list); 71static LIST_HEAD(pers_list);
73static DEFINE_SPINLOCK(pers_lock); 72static DEFINE_SPINLOCK(pers_lock);
74 73
74static void md_print_devices(void);
75
76#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
77
75/* 78/*
76 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 79 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
77 * is 1000 KB/sec, so the extra system load does not show up that much. 80 * is 1000 KB/sec, so the extra system load does not show up that much.
@@ -107,7 +110,7 @@ static ctl_table raid_table[] = {
107 .procname = "speed_limit_min", 110 .procname = "speed_limit_min",
108 .data = &sysctl_speed_limit_min, 111 .data = &sysctl_speed_limit_min,
109 .maxlen = sizeof(int), 112 .maxlen = sizeof(int),
110 .mode = 0644, 113 .mode = S_IRUGO|S_IWUSR,
111 .proc_handler = &proc_dointvec, 114 .proc_handler = &proc_dointvec,
112 }, 115 },
113 { 116 {
@@ -115,7 +118,7 @@ static ctl_table raid_table[] = {
115 .procname = "speed_limit_max", 118 .procname = "speed_limit_max",
116 .data = &sysctl_speed_limit_max, 119 .data = &sysctl_speed_limit_max,
117 .maxlen = sizeof(int), 120 .maxlen = sizeof(int),
118 .mode = 0644, 121 .mode = S_IRUGO|S_IWUSR,
119 .proc_handler = &proc_dointvec, 122 .proc_handler = &proc_dointvec,
120 }, 123 },
121 { .ctl_name = 0 } 124 { .ctl_name = 0 }
@@ -126,7 +129,7 @@ static ctl_table raid_dir_table[] = {
126 .ctl_name = DEV_RAID, 129 .ctl_name = DEV_RAID,
127 .procname = "raid", 130 .procname = "raid",
128 .maxlen = 0, 131 .maxlen = 0,
129 .mode = 0555, 132 .mode = S_IRUGO|S_IXUGO,
130 .child = raid_table, 133 .child = raid_table,
131 }, 134 },
132 { .ctl_name = 0 } 135 { .ctl_name = 0 }
@@ -170,7 +173,7 @@ EXPORT_SYMBOL_GPL(md_new_event);
170/* Alternate version that can be called from interrupts 173/* Alternate version that can be called from interrupts
171 * when calling sysfs_notify isn't needed. 174 * when calling sysfs_notify isn't needed.
172 */ 175 */
173void md_new_event_inintr(mddev_t *mddev) 176static void md_new_event_inintr(mddev_t *mddev)
174{ 177{
175 atomic_inc(&md_event_count); 178 atomic_inc(&md_event_count);
176 wake_up(&md_event_waiters); 179 wake_up(&md_event_waiters);
@@ -732,6 +735,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
732{ 735{
733 mdp_disk_t *desc; 736 mdp_disk_t *desc;
734 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); 737 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
738 __u64 ev1 = md_event(sb);
735 739
736 rdev->raid_disk = -1; 740 rdev->raid_disk = -1;
737 rdev->flags = 0; 741 rdev->flags = 0;
@@ -748,7 +752,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
748 mddev->layout = sb->layout; 752 mddev->layout = sb->layout;
749 mddev->raid_disks = sb->raid_disks; 753 mddev->raid_disks = sb->raid_disks;
750 mddev->size = sb->size; 754 mddev->size = sb->size;
751 mddev->events = md_event(sb); 755 mddev->events = ev1;
752 mddev->bitmap_offset = 0; 756 mddev->bitmap_offset = 0;
753 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 757 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
754 758
@@ -797,7 +801,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
797 801
798 } else if (mddev->pers == NULL) { 802 } else if (mddev->pers == NULL) {
799 /* Insist on good event counter while assembling */ 803 /* Insist on good event counter while assembling */
800 __u64 ev1 = md_event(sb);
801 ++ev1; 804 ++ev1;
802 if (ev1 < mddev->events) 805 if (ev1 < mddev->events)
803 return -EINVAL; 806 return -EINVAL;
@@ -805,19 +808,21 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
805 /* if adding to array with a bitmap, then we can accept an 808 /* if adding to array with a bitmap, then we can accept an
806 * older device ... but not too old. 809 * older device ... but not too old.
807 */ 810 */
808 __u64 ev1 = md_event(sb);
809 if (ev1 < mddev->bitmap->events_cleared) 811 if (ev1 < mddev->bitmap->events_cleared)
810 return 0; 812 return 0;
811 } else /* just a hot-add of a new device, leave raid_disk at -1 */ 813 } else {
812 return 0; 814 if (ev1 < mddev->events)
815 /* just a hot-add of a new device, leave raid_disk at -1 */
816 return 0;
817 }
813 818
814 if (mddev->level != LEVEL_MULTIPATH) { 819 if (mddev->level != LEVEL_MULTIPATH) {
815 desc = sb->disks + rdev->desc_nr; 820 desc = sb->disks + rdev->desc_nr;
816 821
817 if (desc->state & (1<<MD_DISK_FAULTY)) 822 if (desc->state & (1<<MD_DISK_FAULTY))
818 set_bit(Faulty, &rdev->flags); 823 set_bit(Faulty, &rdev->flags);
819 else if (desc->state & (1<<MD_DISK_SYNC) && 824 else if (desc->state & (1<<MD_DISK_SYNC) /* &&
820 desc->raid_disk < mddev->raid_disks) { 825 desc->raid_disk < mddev->raid_disks */) {
821 set_bit(In_sync, &rdev->flags); 826 set_bit(In_sync, &rdev->flags);
822 rdev->raid_disk = desc->raid_disk; 827 rdev->raid_disk = desc->raid_disk;
823 } 828 }
@@ -1057,6 +1062,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1057 if (rdev->sb_size & bmask) 1062 if (rdev->sb_size & bmask)
1058 rdev-> sb_size = (rdev->sb_size | bmask)+1; 1063 rdev-> sb_size = (rdev->sb_size | bmask)+1;
1059 1064
1065 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1066 rdev->desc_nr = -1;
1067 else
1068 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1069
1060 if (refdev == 0) 1070 if (refdev == 0)
1061 ret = 1; 1071 ret = 1;
1062 else { 1072 else {
@@ -1100,6 +1110,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1100static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) 1110static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1101{ 1111{
1102 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); 1112 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1113 __u64 ev1 = le64_to_cpu(sb->events);
1103 1114
1104 rdev->raid_disk = -1; 1115 rdev->raid_disk = -1;
1105 rdev->flags = 0; 1116 rdev->flags = 0;
@@ -1115,7 +1126,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1115 mddev->layout = le32_to_cpu(sb->layout); 1126 mddev->layout = le32_to_cpu(sb->layout);
1116 mddev->raid_disks = le32_to_cpu(sb->raid_disks); 1127 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1117 mddev->size = le64_to_cpu(sb->size)/2; 1128 mddev->size = le64_to_cpu(sb->size)/2;
1118 mddev->events = le64_to_cpu(sb->events); 1129 mddev->events = ev1;
1119 mddev->bitmap_offset = 0; 1130 mddev->bitmap_offset = 0;
1120 mddev->default_bitmap_offset = 1024 >> 9; 1131 mddev->default_bitmap_offset = 1024 >> 9;
1121 1132
@@ -1149,7 +1160,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1149 1160
1150 } else if (mddev->pers == NULL) { 1161 } else if (mddev->pers == NULL) {
1151 /* Insist of good event counter while assembling */ 1162 /* Insist of good event counter while assembling */
1152 __u64 ev1 = le64_to_cpu(sb->events);
1153 ++ev1; 1163 ++ev1;
1154 if (ev1 < mddev->events) 1164 if (ev1 < mddev->events)
1155 return -EINVAL; 1165 return -EINVAL;
@@ -1157,15 +1167,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1157 /* If adding to array with a bitmap, then we can accept an 1167 /* If adding to array with a bitmap, then we can accept an
1158 * older device, but not too old. 1168 * older device, but not too old.
1159 */ 1169 */
1160 __u64 ev1 = le64_to_cpu(sb->events);
1161 if (ev1 < mddev->bitmap->events_cleared) 1170 if (ev1 < mddev->bitmap->events_cleared)
1162 return 0; 1171 return 0;
1163 } else /* just a hot-add of a new device, leave raid_disk at -1 */ 1172 } else {
1164 return 0; 1173 if (ev1 < mddev->events)
1165 1174 /* just a hot-add of a new device, leave raid_disk at -1 */
1175 return 0;
1176 }
1166 if (mddev->level != LEVEL_MULTIPATH) { 1177 if (mddev->level != LEVEL_MULTIPATH) {
1167 int role; 1178 int role;
1168 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1169 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 1179 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1170 switch(role) { 1180 switch(role) {
1171 case 0xffff: /* spare */ 1181 case 0xffff: /* spare */
@@ -1174,7 +1184,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1174 set_bit(Faulty, &rdev->flags); 1184 set_bit(Faulty, &rdev->flags);
1175 break; 1185 break;
1176 default: 1186 default:
1177 set_bit(In_sync, &rdev->flags); 1187 if ((le32_to_cpu(sb->feature_map) &
1188 MD_FEATURE_RECOVERY_OFFSET))
1189 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1190 else
1191 set_bit(In_sync, &rdev->flags);
1178 rdev->raid_disk = role; 1192 rdev->raid_disk = role;
1179 break; 1193 break;
1180 } 1194 }
@@ -1198,6 +1212,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1198 1212
1199 sb->feature_map = 0; 1213 sb->feature_map = 0;
1200 sb->pad0 = 0; 1214 sb->pad0 = 0;
1215 sb->recovery_offset = cpu_to_le64(0);
1201 memset(sb->pad1, 0, sizeof(sb->pad1)); 1216 memset(sb->pad1, 0, sizeof(sb->pad1));
1202 memset(sb->pad2, 0, sizeof(sb->pad2)); 1217 memset(sb->pad2, 0, sizeof(sb->pad2));
1203 memset(sb->pad3, 0, sizeof(sb->pad3)); 1218 memset(sb->pad3, 0, sizeof(sb->pad3));
@@ -1218,6 +1233,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1218 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1233 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1219 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1234 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1220 } 1235 }
1236
1237 if (rdev->raid_disk >= 0 &&
1238 !test_bit(In_sync, &rdev->flags) &&
1239 rdev->recovery_offset > 0) {
1240 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1241 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1242 }
1243
1221 if (mddev->reshape_position != MaxSector) { 1244 if (mddev->reshape_position != MaxSector) {
1222 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); 1245 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1223 sb->reshape_position = cpu_to_le64(mddev->reshape_position); 1246 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
@@ -1242,11 +1265,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1242 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1265 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1243 else if (test_bit(In_sync, &rdev2->flags)) 1266 else if (test_bit(In_sync, &rdev2->flags))
1244 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1267 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1268 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1269 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1245 else 1270 else
1246 sb->dev_roles[i] = cpu_to_le16(0xffff); 1271 sb->dev_roles[i] = cpu_to_le16(0xffff);
1247 } 1272 }
1248 1273
1249 sb->recovery_offset = cpu_to_le64(0); /* not supported yet */
1250 sb->sb_csum = calc_sb_1_csum(sb); 1274 sb->sb_csum = calc_sb_1_csum(sb);
1251} 1275}
1252 1276
@@ -1384,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1384 struct block_device *bdev; 1408 struct block_device *bdev;
1385 char b[BDEVNAME_SIZE]; 1409 char b[BDEVNAME_SIZE];
1386 1410
1387 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1411 bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1388 if (IS_ERR(bdev)) { 1412 if (IS_ERR(bdev)) {
1389 printk(KERN_ERR "md: could not open %s.\n", 1413 printk(KERN_ERR "md: could not open %s.\n",
1390 __bdevname(dev, b)); 1414 __bdevname(dev, b));
@@ -1394,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1394 if (err) { 1418 if (err) {
1395 printk(KERN_ERR "md: could not bd_claim %s.\n", 1419 printk(KERN_ERR "md: could not bd_claim %s.\n",
1396 bdevname(bdev, b)); 1420 bdevname(bdev, b));
1397 blkdev_put(bdev); 1421 blkdev_put_partition(bdev);
1398 return err; 1422 return err;
1399 } 1423 }
1400 rdev->bdev = bdev; 1424 rdev->bdev = bdev;
@@ -1408,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1408 if (!bdev) 1432 if (!bdev)
1409 MD_BUG(); 1433 MD_BUG();
1410 bd_release(bdev); 1434 bd_release(bdev);
1411 blkdev_put(bdev); 1435 blkdev_put_partition(bdev);
1412} 1436}
1413 1437
1414void md_autodetect_dev(dev_t dev); 1438void md_autodetect_dev(dev_t dev);
@@ -1507,7 +1531,7 @@ static void print_rdev(mdk_rdev_t *rdev)
1507 printk(KERN_INFO "md: no rdev superblock!\n"); 1531 printk(KERN_INFO "md: no rdev superblock!\n");
1508} 1532}
1509 1533
1510void md_print_devices(void) 1534static void md_print_devices(void)
1511{ 1535{
1512 struct list_head *tmp, *tmp2; 1536 struct list_head *tmp, *tmp2;
1513 mdk_rdev_t *rdev; 1537 mdk_rdev_t *rdev;
@@ -1536,15 +1560,30 @@ void md_print_devices(void)
1536} 1560}
1537 1561
1538 1562
1539static void sync_sbs(mddev_t * mddev) 1563static void sync_sbs(mddev_t * mddev, int nospares)
1540{ 1564{
1565 /* Update each superblock (in-memory image), but
1566 * if we are allowed to, skip spares which already
1567 * have the right event counter, or have one earlier
1568 * (which would mean they aren't being marked as dirty
1569 * with the rest of the array)
1570 */
1541 mdk_rdev_t *rdev; 1571 mdk_rdev_t *rdev;
1542 struct list_head *tmp; 1572 struct list_head *tmp;
1543 1573
1544 ITERATE_RDEV(mddev,rdev,tmp) { 1574 ITERATE_RDEV(mddev,rdev,tmp) {
1545 super_types[mddev->major_version]. 1575 if (rdev->sb_events == mddev->events ||
1546 sync_super(mddev, rdev); 1576 (nospares &&
1547 rdev->sb_loaded = 1; 1577 rdev->raid_disk < 0 &&
1578 (rdev->sb_events&1)==0 &&
1579 rdev->sb_events+1 == mddev->events)) {
1580 /* Don't update this superblock */
1581 rdev->sb_loaded = 2;
1582 } else {
1583 super_types[mddev->major_version].
1584 sync_super(mddev, rdev);
1585 rdev->sb_loaded = 1;
1586 }
1548 } 1587 }
1549} 1588}
1550 1589
@@ -1554,12 +1593,55 @@ void md_update_sb(mddev_t * mddev)
1554 struct list_head *tmp; 1593 struct list_head *tmp;
1555 mdk_rdev_t *rdev; 1594 mdk_rdev_t *rdev;
1556 int sync_req; 1595 int sync_req;
1596 int nospares = 0;
1557 1597
1558repeat: 1598repeat:
1559 spin_lock_irq(&mddev->write_lock); 1599 spin_lock_irq(&mddev->write_lock);
1600
1601 if (mddev->degraded && mddev->sb_dirty == 3)
1602 /* If the array is degraded, then skipping spares is both
1603 * dangerous and fairly pointless.
1604 * Dangerous because a device that was removed from the array
1605 * might have a event_count that still looks up-to-date,
1606 * so it can be re-added without a resync.
1607 * Pointless because if there are any spares to skip,
1608 * then a recovery will happen and soon that array won't
1609 * be degraded any more and the spare can go back to sleep then.
1610 */
1611 mddev->sb_dirty = 1;
1612
1560 sync_req = mddev->in_sync; 1613 sync_req = mddev->in_sync;
1561 mddev->utime = get_seconds(); 1614 mddev->utime = get_seconds();
1562 mddev->events ++; 1615 if (mddev->sb_dirty == 3)
1616 /* just a clean<-> dirty transition, possibly leave spares alone,
1617 * though if events isn't the right even/odd, we will have to do
1618 * spares after all
1619 */
1620 nospares = 1;
1621
1622 /* If this is just a dirty<->clean transition, and the array is clean
1623 * and 'events' is odd, we can roll back to the previous clean state */
1624 if (mddev->sb_dirty == 3
1625 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1626 && (mddev->events & 1))
1627 mddev->events--;
1628 else {
1629 /* otherwise we have to go forward and ... */
1630 mddev->events ++;
1631 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1632 /* .. if the array isn't clean, insist on an odd 'events' */
1633 if ((mddev->events&1)==0) {
1634 mddev->events++;
1635 nospares = 0;
1636 }
1637 } else {
1638 /* otherwise insist on an even 'events' (for clean states) */
1639 if ((mddev->events&1)) {
1640 mddev->events++;
1641 nospares = 0;
1642 }
1643 }
1644 }
1563 1645
1564 if (!mddev->events) { 1646 if (!mddev->events) {
1565 /* 1647 /*
@@ -1571,7 +1653,7 @@ repeat:
1571 mddev->events --; 1653 mddev->events --;
1572 } 1654 }
1573 mddev->sb_dirty = 2; 1655 mddev->sb_dirty = 2;
1574 sync_sbs(mddev); 1656 sync_sbs(mddev, nospares);
1575 1657
1576 /* 1658 /*
1577 * do not write anything to disk if using 1659 * do not write anything to disk if using
@@ -1593,6 +1675,8 @@ repeat:
1593 ITERATE_RDEV(mddev,rdev,tmp) { 1675 ITERATE_RDEV(mddev,rdev,tmp) {
1594 char b[BDEVNAME_SIZE]; 1676 char b[BDEVNAME_SIZE];
1595 dprintk(KERN_INFO "md: "); 1677 dprintk(KERN_INFO "md: ");
1678 if (rdev->sb_loaded != 1)
1679 continue; /* no noise on spare devices */
1596 if (test_bit(Faulty, &rdev->flags)) 1680 if (test_bit(Faulty, &rdev->flags))
1597 dprintk("(skipping faulty "); 1681 dprintk("(skipping faulty ");
1598 1682
@@ -1604,6 +1688,7 @@ repeat:
1604 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", 1688 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1605 bdevname(rdev->bdev,b), 1689 bdevname(rdev->bdev,b),
1606 (unsigned long long)rdev->sb_offset); 1690 (unsigned long long)rdev->sb_offset);
1691 rdev->sb_events = mddev->events;
1607 1692
1608 } else 1693 } else
1609 dprintk(")\n"); 1694 dprintk(")\n");
@@ -1667,6 +1752,10 @@ state_show(mdk_rdev_t *rdev, char *page)
1667 len += sprintf(page+len, "%sin_sync",sep); 1752 len += sprintf(page+len, "%sin_sync",sep);
1668 sep = ","; 1753 sep = ",";
1669 } 1754 }
1755 if (test_bit(WriteMostly, &rdev->flags)) {
1756 len += sprintf(page+len, "%swrite_mostly",sep);
1757 sep = ",";
1758 }
1670 if (!test_bit(Faulty, &rdev->flags) && 1759 if (!test_bit(Faulty, &rdev->flags) &&
1671 !test_bit(In_sync, &rdev->flags)) { 1760 !test_bit(In_sync, &rdev->flags)) {
1672 len += sprintf(page+len, "%sspare", sep); 1761 len += sprintf(page+len, "%sspare", sep);
@@ -1675,8 +1764,40 @@ state_show(mdk_rdev_t *rdev, char *page)
1675 return len+sprintf(page+len, "\n"); 1764 return len+sprintf(page+len, "\n");
1676} 1765}
1677 1766
1678static struct rdev_sysfs_entry 1767static ssize_t
1679rdev_state = __ATTR_RO(state); 1768state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1769{
1770 /* can write
1771 * faulty - simulates and error
1772 * remove - disconnects the device
1773 * writemostly - sets write_mostly
1774 * -writemostly - clears write_mostly
1775 */
1776 int err = -EINVAL;
1777 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
1778 md_error(rdev->mddev, rdev);
1779 err = 0;
1780 } else if (cmd_match(buf, "remove")) {
1781 if (rdev->raid_disk >= 0)
1782 err = -EBUSY;
1783 else {
1784 mddev_t *mddev = rdev->mddev;
1785 kick_rdev_from_array(rdev);
1786 md_update_sb(mddev);
1787 md_new_event(mddev);
1788 err = 0;
1789 }
1790 } else if (cmd_match(buf, "writemostly")) {
1791 set_bit(WriteMostly, &rdev->flags);
1792 err = 0;
1793 } else if (cmd_match(buf, "-writemostly")) {
1794 clear_bit(WriteMostly, &rdev->flags);
1795 err = 0;
1796 }
1797 return err ? err : len;
1798}
1799static struct rdev_sysfs_entry rdev_state =
1800__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
1680 1801
1681static ssize_t 1802static ssize_t
1682super_show(mdk_rdev_t *rdev, char *page) 1803super_show(mdk_rdev_t *rdev, char *page)
@@ -1707,7 +1828,7 @@ errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1707 return -EINVAL; 1828 return -EINVAL;
1708} 1829}
1709static struct rdev_sysfs_entry rdev_errors = 1830static struct rdev_sysfs_entry rdev_errors =
1710__ATTR(errors, 0644, errors_show, errors_store); 1831__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
1711 1832
1712static ssize_t 1833static ssize_t
1713slot_show(mdk_rdev_t *rdev, char *page) 1834slot_show(mdk_rdev_t *rdev, char *page)
@@ -1741,7 +1862,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1741 1862
1742 1863
1743static struct rdev_sysfs_entry rdev_slot = 1864static struct rdev_sysfs_entry rdev_slot =
1744__ATTR(slot, 0644, slot_show, slot_store); 1865__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
1745 1866
1746static ssize_t 1867static ssize_t
1747offset_show(mdk_rdev_t *rdev, char *page) 1868offset_show(mdk_rdev_t *rdev, char *page)
@@ -1763,7 +1884,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1763} 1884}
1764 1885
1765static struct rdev_sysfs_entry rdev_offset = 1886static struct rdev_sysfs_entry rdev_offset =
1766__ATTR(offset, 0644, offset_show, offset_store); 1887__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
1767 1888
1768static ssize_t 1889static ssize_t
1769rdev_size_show(mdk_rdev_t *rdev, char *page) 1890rdev_size_show(mdk_rdev_t *rdev, char *page)
@@ -1787,7 +1908,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1787} 1908}
1788 1909
1789static struct rdev_sysfs_entry rdev_size = 1910static struct rdev_sysfs_entry rdev_size =
1790__ATTR(size, 0644, rdev_size_show, rdev_size_store); 1911__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
1791 1912
1792static struct attribute *rdev_default_attrs[] = { 1913static struct attribute *rdev_default_attrs[] = {
1793 &rdev_state.attr, 1914 &rdev_state.attr,
@@ -1818,6 +1939,8 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
1818 1939
1819 if (!entry->store) 1940 if (!entry->store)
1820 return -EIO; 1941 return -EIO;
1942 if (!capable(CAP_SYS_ADMIN))
1943 return -EACCES;
1821 return entry->store(rdev, page, length); 1944 return entry->store(rdev, page, length);
1822} 1945}
1823 1946
@@ -1873,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
1873 rdev->desc_nr = -1; 1996 rdev->desc_nr = -1;
1874 rdev->flags = 0; 1997 rdev->flags = 0;
1875 rdev->data_offset = 0; 1998 rdev->data_offset = 0;
1999 rdev->sb_events = 0;
1876 atomic_set(&rdev->nr_pending, 0); 2000 atomic_set(&rdev->nr_pending, 0);
1877 atomic_set(&rdev->read_errors, 0); 2001 atomic_set(&rdev->read_errors, 0);
1878 atomic_set(&rdev->corrected_errors, 0); 2002 atomic_set(&rdev->corrected_errors, 0);
@@ -1978,6 +2102,54 @@ static void analyze_sbs(mddev_t * mddev)
1978} 2102}
1979 2103
1980static ssize_t 2104static ssize_t
2105safe_delay_show(mddev_t *mddev, char *page)
2106{
2107 int msec = (mddev->safemode_delay*1000)/HZ;
2108 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2109}
2110static ssize_t
2111safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2112{
2113 int scale=1;
2114 int dot=0;
2115 int i;
2116 unsigned long msec;
2117 char buf[30];
2118 char *e;
2119 /* remove a period, and count digits after it */
2120 if (len >= sizeof(buf))
2121 return -EINVAL;
2122 strlcpy(buf, cbuf, len);
2123 buf[len] = 0;
2124 for (i=0; i<len; i++) {
2125 if (dot) {
2126 if (isdigit(buf[i])) {
2127 buf[i-1] = buf[i];
2128 scale *= 10;
2129 }
2130 buf[i] = 0;
2131 } else if (buf[i] == '.') {
2132 dot=1;
2133 buf[i] = 0;
2134 }
2135 }
2136 msec = simple_strtoul(buf, &e, 10);
2137 if (e == buf || (*e && *e != '\n'))
2138 return -EINVAL;
2139 msec = (msec * 1000) / scale;
2140 if (msec == 0)
2141 mddev->safemode_delay = 0;
2142 else {
2143 mddev->safemode_delay = (msec*HZ)/1000;
2144 if (mddev->safemode_delay == 0)
2145 mddev->safemode_delay = 1;
2146 }
2147 return len;
2148}
2149static struct md_sysfs_entry md_safe_delay =
2150__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2151
2152static ssize_t
1981level_show(mddev_t *mddev, char *page) 2153level_show(mddev_t *mddev, char *page)
1982{ 2154{
1983 struct mdk_personality *p = mddev->pers; 2155 struct mdk_personality *p = mddev->pers;
@@ -2010,7 +2182,33 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2010} 2182}
2011 2183
2012static struct md_sysfs_entry md_level = 2184static struct md_sysfs_entry md_level =
2013__ATTR(level, 0644, level_show, level_store); 2185__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2186
2187
2188static ssize_t
2189layout_show(mddev_t *mddev, char *page)
2190{
2191 /* just a number, not meaningful for all levels */
2192 return sprintf(page, "%d\n", mddev->layout);
2193}
2194
2195static ssize_t
2196layout_store(mddev_t *mddev, const char *buf, size_t len)
2197{
2198 char *e;
2199 unsigned long n = simple_strtoul(buf, &e, 10);
2200 if (mddev->pers)
2201 return -EBUSY;
2202
2203 if (!*buf || (*e && *e != '\n'))
2204 return -EINVAL;
2205
2206 mddev->layout = n;
2207 return len;
2208}
2209static struct md_sysfs_entry md_layout =
2210__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2211
2014 2212
2015static ssize_t 2213static ssize_t
2016raid_disks_show(mddev_t *mddev, char *page) 2214raid_disks_show(mddev_t *mddev, char *page)
@@ -2040,7 +2238,7 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2040 return rv ? rv : len; 2238 return rv ? rv : len;
2041} 2239}
2042static struct md_sysfs_entry md_raid_disks = 2240static struct md_sysfs_entry md_raid_disks =
2043__ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); 2241__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2044 2242
2045static ssize_t 2243static ssize_t
2046chunk_size_show(mddev_t *mddev, char *page) 2244chunk_size_show(mddev_t *mddev, char *page)
@@ -2064,7 +2262,202 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2064 return len; 2262 return len;
2065} 2263}
2066static struct md_sysfs_entry md_chunk_size = 2264static struct md_sysfs_entry md_chunk_size =
2067__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); 2265__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2266
2267static ssize_t
2268resync_start_show(mddev_t *mddev, char *page)
2269{
2270 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2271}
2272
2273static ssize_t
2274resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2275{
2276 /* can only set chunk_size if array is not yet active */
2277 char *e;
2278 unsigned long long n = simple_strtoull(buf, &e, 10);
2279
2280 if (mddev->pers)
2281 return -EBUSY;
2282 if (!*buf || (*e && *e != '\n'))
2283 return -EINVAL;
2284
2285 mddev->recovery_cp = n;
2286 return len;
2287}
2288static struct md_sysfs_entry md_resync_start =
2289__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2290
2291/*
2292 * The array state can be:
2293 *
2294 * clear
2295 * No devices, no size, no level
2296 * Equivalent to STOP_ARRAY ioctl
2297 * inactive
2298 * May have some settings, but array is not active
2299 * all IO results in error
2300 * When written, doesn't tear down array, but just stops it
2301 * suspended (not supported yet)
2302 * All IO requests will block. The array can be reconfigured.
2303 * Writing this, if accepted, will block until array is quiessent
2304 * readonly
2305 * no resync can happen. no superblocks get written.
2306 * write requests fail
2307 * read-auto
2308 * like readonly, but behaves like 'clean' on a write request.
2309 *
2310 * clean - no pending writes, but otherwise active.
2311 * When written to inactive array, starts without resync
2312 * If a write request arrives then
2313 * if metadata is known, mark 'dirty' and switch to 'active'.
2314 * if not known, block and switch to write-pending
2315 * If written to an active array that has pending writes, then fails.
2316 * active
2317 * fully active: IO and resync can be happening.
2318 * When written to inactive array, starts with resync
2319 *
2320 * write-pending
2321 * clean, but writes are blocked waiting for 'active' to be written.
2322 *
2323 * active-idle
2324 * like active, but no writes have been seen for a while (100msec).
2325 *
2326 */
2327enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2328 write_pending, active_idle, bad_word};
2329static char *array_states[] = {
2330 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2331 "write-pending", "active-idle", NULL };
2332
2333static int match_word(const char *word, char **list)
2334{
2335 int n;
2336 for (n=0; list[n]; n++)
2337 if (cmd_match(word, list[n]))
2338 break;
2339 return n;
2340}
2341
2342static ssize_t
2343array_state_show(mddev_t *mddev, char *page)
2344{
2345 enum array_state st = inactive;
2346
2347 if (mddev->pers)
2348 switch(mddev->ro) {
2349 case 1:
2350 st = readonly;
2351 break;
2352 case 2:
2353 st = read_auto;
2354 break;
2355 case 0:
2356 if (mddev->in_sync)
2357 st = clean;
2358 else if (mddev->safemode)
2359 st = active_idle;
2360 else
2361 st = active;
2362 }
2363 else {
2364 if (list_empty(&mddev->disks) &&
2365 mddev->raid_disks == 0 &&
2366 mddev->size == 0)
2367 st = clear;
2368 else
2369 st = inactive;
2370 }
2371 return sprintf(page, "%s\n", array_states[st]);
2372}
2373
2374static int do_md_stop(mddev_t * mddev, int ro);
2375static int do_md_run(mddev_t * mddev);
2376static int restart_array(mddev_t *mddev);
2377
2378static ssize_t
2379array_state_store(mddev_t *mddev, const char *buf, size_t len)
2380{
2381 int err = -EINVAL;
2382 enum array_state st = match_word(buf, array_states);
2383 switch(st) {
2384 case bad_word:
2385 break;
2386 case clear:
2387 /* stopping an active array */
2388 if (mddev->pers) {
2389 if (atomic_read(&mddev->active) > 1)
2390 return -EBUSY;
2391 err = do_md_stop(mddev, 0);
2392 }
2393 break;
2394 case inactive:
2395 /* stopping an active array */
2396 if (mddev->pers) {
2397 if (atomic_read(&mddev->active) > 1)
2398 return -EBUSY;
2399 err = do_md_stop(mddev, 2);
2400 }
2401 break;
2402 case suspended:
2403 break; /* not supported yet */
2404 case readonly:
2405 if (mddev->pers)
2406 err = do_md_stop(mddev, 1);
2407 else {
2408 mddev->ro = 1;
2409 err = do_md_run(mddev);
2410 }
2411 break;
2412 case read_auto:
2413 /* stopping an active array */
2414 if (mddev->pers) {
2415 err = do_md_stop(mddev, 1);
2416 if (err == 0)
2417 mddev->ro = 2; /* FIXME mark devices writable */
2418 } else {
2419 mddev->ro = 2;
2420 err = do_md_run(mddev);
2421 }
2422 break;
2423 case clean:
2424 if (mddev->pers) {
2425 restart_array(mddev);
2426 spin_lock_irq(&mddev->write_lock);
2427 if (atomic_read(&mddev->writes_pending) == 0) {
2428 mddev->in_sync = 1;
2429 mddev->sb_dirty = 1;
2430 }
2431 spin_unlock_irq(&mddev->write_lock);
2432 } else {
2433 mddev->ro = 0;
2434 mddev->recovery_cp = MaxSector;
2435 err = do_md_run(mddev);
2436 }
2437 break;
2438 case active:
2439 if (mddev->pers) {
2440 restart_array(mddev);
2441 mddev->sb_dirty = 0;
2442 wake_up(&mddev->sb_wait);
2443 err = 0;
2444 } else {
2445 mddev->ro = 0;
2446 err = do_md_run(mddev);
2447 }
2448 break;
2449 case write_pending:
2450 case active_idle:
2451 /* these cannot be set */
2452 break;
2453 }
2454 if (err)
2455 return err;
2456 else
2457 return len;
2458}
2459static struct md_sysfs_entry md_array_state =
2460__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
2068 2461
2069static ssize_t 2462static ssize_t
2070null_show(mddev_t *mddev, char *page) 2463null_show(mddev_t *mddev, char *page)
@@ -2124,7 +2517,7 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2124} 2517}
2125 2518
2126static struct md_sysfs_entry md_new_device = 2519static struct md_sysfs_entry md_new_device =
2127__ATTR(new_dev, 0200, null_show, new_dev_store); 2520__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
2128 2521
2129static ssize_t 2522static ssize_t
2130size_show(mddev_t *mddev, char *page) 2523size_show(mddev_t *mddev, char *page)
@@ -2162,7 +2555,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
2162} 2555}
2163 2556
2164static struct md_sysfs_entry md_size = 2557static struct md_sysfs_entry md_size =
2165__ATTR(component_size, 0644, size_show, size_store); 2558__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2166 2559
2167 2560
2168/* Metdata version. 2561/* Metdata version.
@@ -2210,7 +2603,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2210} 2603}
2211 2604
2212static struct md_sysfs_entry md_metadata = 2605static struct md_sysfs_entry md_metadata =
2213__ATTR(metadata_version, 0644, metadata_show, metadata_store); 2606__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2214 2607
2215static ssize_t 2608static ssize_t
2216action_show(mddev_t *mddev, char *page) 2609action_show(mddev_t *mddev, char *page)
@@ -2278,12 +2671,11 @@ mismatch_cnt_show(mddev_t *mddev, char *page)
2278 (unsigned long long) mddev->resync_mismatches); 2671 (unsigned long long) mddev->resync_mismatches);
2279} 2672}
2280 2673
2281static struct md_sysfs_entry 2674static struct md_sysfs_entry md_scan_mode =
2282md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); 2675__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
2283 2676
2284 2677
2285static struct md_sysfs_entry 2678static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
2286md_mismatches = __ATTR_RO(mismatch_cnt);
2287 2679
2288static ssize_t 2680static ssize_t
2289sync_min_show(mddev_t *mddev, char *page) 2681sync_min_show(mddev_t *mddev, char *page)
@@ -2342,15 +2734,14 @@ static ssize_t
2342sync_speed_show(mddev_t *mddev, char *page) 2734sync_speed_show(mddev_t *mddev, char *page)
2343{ 2735{
2344 unsigned long resync, dt, db; 2736 unsigned long resync, dt, db;
2345 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); 2737 resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active));
2346 dt = ((jiffies - mddev->resync_mark) / HZ); 2738 dt = ((jiffies - mddev->resync_mark) / HZ);
2347 if (!dt) dt++; 2739 if (!dt) dt++;
2348 db = resync - (mddev->resync_mark_cnt); 2740 db = resync - (mddev->resync_mark_cnt);
2349 return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ 2741 return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
2350} 2742}
2351 2743
2352static struct md_sysfs_entry 2744static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
2353md_sync_speed = __ATTR_RO(sync_speed);
2354 2745
2355static ssize_t 2746static ssize_t
2356sync_completed_show(mddev_t *mddev, char *page) 2747sync_completed_show(mddev_t *mddev, char *page)
@@ -2366,8 +2757,7 @@ sync_completed_show(mddev_t *mddev, char *page)
2366 return sprintf(page, "%lu / %lu\n", resync, max_blocks); 2757 return sprintf(page, "%lu / %lu\n", resync, max_blocks);
2367} 2758}
2368 2759
2369static struct md_sysfs_entry 2760static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
2370md_sync_completed = __ATTR_RO(sync_completed);
2371 2761
2372static ssize_t 2762static ssize_t
2373suspend_lo_show(mddev_t *mddev, char *page) 2763suspend_lo_show(mddev_t *mddev, char *page)
@@ -2428,11 +2818,15 @@ __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
2428 2818
2429static struct attribute *md_default_attrs[] = { 2819static struct attribute *md_default_attrs[] = {
2430 &md_level.attr, 2820 &md_level.attr,
2821 &md_layout.attr,
2431 &md_raid_disks.attr, 2822 &md_raid_disks.attr,
2432 &md_chunk_size.attr, 2823 &md_chunk_size.attr,
2433 &md_size.attr, 2824 &md_size.attr,
2825 &md_resync_start.attr,
2434 &md_metadata.attr, 2826 &md_metadata.attr,
2435 &md_new_device.attr, 2827 &md_new_device.attr,
2828 &md_safe_delay.attr,
2829 &md_array_state.attr,
2436 NULL, 2830 NULL,
2437}; 2831};
2438 2832
@@ -2480,6 +2874,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
2480 2874
2481 if (!entry->store) 2875 if (!entry->store)
2482 return -EIO; 2876 return -EIO;
2877 if (!capable(CAP_SYS_ADMIN))
2878 return -EACCES;
2483 rv = mddev_lock(mddev); 2879 rv = mddev_lock(mddev);
2484 if (!rv) { 2880 if (!rv) {
2485 rv = entry->store(mddev, page, length); 2881 rv = entry->store(mddev, page, length);
@@ -2532,13 +2928,10 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2532 } 2928 }
2533 disk->major = MAJOR(dev); 2929 disk->major = MAJOR(dev);
2534 disk->first_minor = unit << shift; 2930 disk->first_minor = unit << shift;
2535 if (partitioned) { 2931 if (partitioned)
2536 sprintf(disk->disk_name, "md_d%d", unit); 2932 sprintf(disk->disk_name, "md_d%d", unit);
2537 sprintf(disk->devfs_name, "md/d%d", unit); 2933 else
2538 } else {
2539 sprintf(disk->disk_name, "md%d", unit); 2934 sprintf(disk->disk_name, "md%d", unit);
2540 sprintf(disk->devfs_name, "md/%d", unit);
2541 }
2542 disk->fops = &md_fops; 2935 disk->fops = &md_fops;
2543 disk->private_data = mddev; 2936 disk->private_data = mddev;
2544 disk->queue = mddev->queue; 2937 disk->queue = mddev->queue;
@@ -2553,8 +2946,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2553 return NULL; 2946 return NULL;
2554} 2947}
2555 2948
2556void md_wakeup_thread(mdk_thread_t *thread);
2557
2558static void md_safemode_timeout(unsigned long data) 2949static void md_safemode_timeout(unsigned long data)
2559{ 2950{
2560 mddev_t *mddev = (mddev_t *) data; 2951 mddev_t *mddev = (mddev_t *) data;
@@ -2708,7 +3099,7 @@ static int do_md_run(mddev_t * mddev)
2708 mddev->safemode = 0; 3099 mddev->safemode = 0;
2709 mddev->safemode_timer.function = md_safemode_timeout; 3100 mddev->safemode_timer.function = md_safemode_timeout;
2710 mddev->safemode_timer.data = (unsigned long) mddev; 3101 mddev->safemode_timer.data = (unsigned long) mddev;
2711 mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ 3102 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
2712 mddev->in_sync = 1; 3103 mddev->in_sync = 1;
2713 3104
2714 ITERATE_RDEV(mddev,rdev,tmp) 3105 ITERATE_RDEV(mddev,rdev,tmp)
@@ -2719,7 +3110,6 @@ static int do_md_run(mddev_t * mddev)
2719 } 3110 }
2720 3111
2721 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3112 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2722 md_wakeup_thread(mddev->thread);
2723 3113
2724 if (mddev->sb_dirty) 3114 if (mddev->sb_dirty)
2725 md_update_sb(mddev); 3115 md_update_sb(mddev);
@@ -2736,6 +3126,37 @@ static int do_md_run(mddev_t * mddev)
2736 mddev->queue->queuedata = mddev; 3126 mddev->queue->queuedata = mddev;
2737 mddev->queue->make_request_fn = mddev->pers->make_request; 3127 mddev->queue->make_request_fn = mddev->pers->make_request;
2738 3128
3129 /* If there is a partially-recovered drive we need to
3130 * start recovery here. If we leave it to md_check_recovery,
3131 * it will remove the drives and not do the right thing
3132 */
3133 if (mddev->degraded && !mddev->sync_thread) {
3134 struct list_head *rtmp;
3135 int spares = 0;
3136 ITERATE_RDEV(mddev,rdev,rtmp)
3137 if (rdev->raid_disk >= 0 &&
3138 !test_bit(In_sync, &rdev->flags) &&
3139 !test_bit(Faulty, &rdev->flags))
3140 /* complete an interrupted recovery */
3141 spares++;
3142 if (spares && mddev->pers->sync_request) {
3143 mddev->recovery = 0;
3144 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3145 mddev->sync_thread = md_register_thread(md_do_sync,
3146 mddev,
3147 "%s_resync");
3148 if (!mddev->sync_thread) {
3149 printk(KERN_ERR "%s: could not start resync"
3150 " thread...\n",
3151 mdname(mddev));
3152 /* leave the spares where they are, it shouldn't hurt */
3153 mddev->recovery = 0;
3154 }
3155 }
3156 }
3157 md_wakeup_thread(mddev->thread);
3158 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
3159
2739 mddev->changed = 1; 3160 mddev->changed = 1;
2740 md_new_event(mddev); 3161 md_new_event(mddev);
2741 return 0; 3162 return 0;
@@ -2769,18 +3190,47 @@ static int restart_array(mddev_t *mddev)
2769 */ 3190 */
2770 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3191 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2771 md_wakeup_thread(mddev->thread); 3192 md_wakeup_thread(mddev->thread);
3193 md_wakeup_thread(mddev->sync_thread);
2772 err = 0; 3194 err = 0;
2773 } else { 3195 } else
2774 printk(KERN_ERR "md: %s has no personality assigned.\n",
2775 mdname(mddev));
2776 err = -EINVAL; 3196 err = -EINVAL;
2777 }
2778 3197
2779out: 3198out:
2780 return err; 3199 return err;
2781} 3200}
2782 3201
2783static int do_md_stop(mddev_t * mddev, int ro) 3202/* similar to deny_write_access, but accounts for our holding a reference
3203 * to the file ourselves */
3204static int deny_bitmap_write_access(struct file * file)
3205{
3206 struct inode *inode = file->f_mapping->host;
3207
3208 spin_lock(&inode->i_lock);
3209 if (atomic_read(&inode->i_writecount) > 1) {
3210 spin_unlock(&inode->i_lock);
3211 return -ETXTBSY;
3212 }
3213 atomic_set(&inode->i_writecount, -1);
3214 spin_unlock(&inode->i_lock);
3215
3216 return 0;
3217}
3218
3219static void restore_bitmap_write_access(struct file *file)
3220{
3221 struct inode *inode = file->f_mapping->host;
3222
3223 spin_lock(&inode->i_lock);
3224 atomic_set(&inode->i_writecount, 1);
3225 spin_unlock(&inode->i_lock);
3226}
3227
3228/* mode:
3229 * 0 - completely stop and dis-assemble array
3230 * 1 - switch to readonly
3231 * 2 - stop but do not disassemble array
3232 */
3233static int do_md_stop(mddev_t * mddev, int mode)
2784{ 3234{
2785 int err = 0; 3235 int err = 0;
2786 struct gendisk *disk = mddev->gendisk; 3236 struct gendisk *disk = mddev->gendisk;
@@ -2792,6 +3242,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2792 } 3242 }
2793 3243
2794 if (mddev->sync_thread) { 3244 if (mddev->sync_thread) {
3245 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2795 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3246 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2796 md_unregister_thread(mddev->sync_thread); 3247 md_unregister_thread(mddev->sync_thread);
2797 mddev->sync_thread = NULL; 3248 mddev->sync_thread = NULL;
@@ -2801,12 +3252,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
2801 3252
2802 invalidate_partition(disk, 0); 3253 invalidate_partition(disk, 0);
2803 3254
2804 if (ro) { 3255 switch(mode) {
3256 case 1: /* readonly */
2805 err = -ENXIO; 3257 err = -ENXIO;
2806 if (mddev->ro==1) 3258 if (mddev->ro==1)
2807 goto out; 3259 goto out;
2808 mddev->ro = 1; 3260 mddev->ro = 1;
2809 } else { 3261 break;
3262 case 0: /* disassemble */
3263 case 2: /* stop */
2810 bitmap_flush(mddev); 3264 bitmap_flush(mddev);
2811 md_super_wait(mddev); 3265 md_super_wait(mddev);
2812 if (mddev->ro) 3266 if (mddev->ro)
@@ -2821,19 +3275,20 @@ static int do_md_stop(mddev_t * mddev, int ro)
2821 if (mddev->ro) 3275 if (mddev->ro)
2822 mddev->ro = 0; 3276 mddev->ro = 0;
2823 } 3277 }
2824 if (!mddev->in_sync) { 3278 if (!mddev->in_sync || mddev->sb_dirty) {
2825 /* mark array as shutdown cleanly */ 3279 /* mark array as shutdown cleanly */
2826 mddev->in_sync = 1; 3280 mddev->in_sync = 1;
2827 md_update_sb(mddev); 3281 md_update_sb(mddev);
2828 } 3282 }
2829 if (ro) 3283 if (mode == 1)
2830 set_disk_ro(disk, 1); 3284 set_disk_ro(disk, 1);
3285 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2831 } 3286 }
2832 3287
2833 /* 3288 /*
2834 * Free resources if final stop 3289 * Free resources if final stop
2835 */ 3290 */
2836 if (!ro) { 3291 if (mode == 0) {
2837 mdk_rdev_t *rdev; 3292 mdk_rdev_t *rdev;
2838 struct list_head *tmp; 3293 struct list_head *tmp;
2839 struct gendisk *disk; 3294 struct gendisk *disk;
@@ -2841,7 +3296,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2841 3296
2842 bitmap_destroy(mddev); 3297 bitmap_destroy(mddev);
2843 if (mddev->bitmap_file) { 3298 if (mddev->bitmap_file) {
2844 atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); 3299 restore_bitmap_write_access(mddev->bitmap_file);
2845 fput(mddev->bitmap_file); 3300 fput(mddev->bitmap_file);
2846 mddev->bitmap_file = NULL; 3301 mddev->bitmap_file = NULL;
2847 } 3302 }
@@ -2857,11 +3312,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
2857 export_array(mddev); 3312 export_array(mddev);
2858 3313
2859 mddev->array_size = 0; 3314 mddev->array_size = 0;
3315 mddev->size = 0;
3316 mddev->raid_disks = 0;
3317 mddev->recovery_cp = 0;
3318
2860 disk = mddev->gendisk; 3319 disk = mddev->gendisk;
2861 if (disk) 3320 if (disk)
2862 set_capacity(disk, 0); 3321 set_capacity(disk, 0);
2863 mddev->changed = 1; 3322 mddev->changed = 1;
2864 } else 3323 } else if (mddev->pers)
2865 printk(KERN_INFO "md: %s switched to read-only mode.\n", 3324 printk(KERN_INFO "md: %s switched to read-only mode.\n",
2866 mdname(mddev)); 3325 mdname(mddev));
2867 err = 0; 3326 err = 0;
@@ -3264,6 +3723,17 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
3264 3723
3265 rdev->raid_disk = -1; 3724 rdev->raid_disk = -1;
3266 err = bind_rdev_to_array(rdev, mddev); 3725 err = bind_rdev_to_array(rdev, mddev);
3726 if (!err && !mddev->pers->hot_remove_disk) {
3727 /* If there is hot_add_disk but no hot_remove_disk
3728 * then added disks for geometry changes,
3729 * and should be added immediately.
3730 */
3731 super_types[mddev->major_version].
3732 validate_super(mddev, rdev);
3733 err = mddev->pers->hot_add_disk(mddev, rdev);
3734 if (err)
3735 unbind_rdev_from_array(rdev);
3736 }
3267 if (err) 3737 if (err)
3268 export_rdev(rdev); 3738 export_rdev(rdev);
3269 3739
@@ -3434,23 +3904,6 @@ abort_export:
3434 return err; 3904 return err;
3435} 3905}
3436 3906
3437/* similar to deny_write_access, but accounts for our holding a reference
3438 * to the file ourselves */
3439static int deny_bitmap_write_access(struct file * file)
3440{
3441 struct inode *inode = file->f_mapping->host;
3442
3443 spin_lock(&inode->i_lock);
3444 if (atomic_read(&inode->i_writecount) > 1) {
3445 spin_unlock(&inode->i_lock);
3446 return -ETXTBSY;
3447 }
3448 atomic_set(&inode->i_writecount, -1);
3449 spin_unlock(&inode->i_lock);
3450
3451 return 0;
3452}
3453
3454static int set_bitmap_file(mddev_t *mddev, int fd) 3907static int set_bitmap_file(mddev_t *mddev, int fd)
3455{ 3908{
3456 int err; 3909 int err;
@@ -3491,12 +3944,17 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
3491 mddev->pers->quiesce(mddev, 1); 3944 mddev->pers->quiesce(mddev, 1);
3492 if (fd >= 0) 3945 if (fd >= 0)
3493 err = bitmap_create(mddev); 3946 err = bitmap_create(mddev);
3494 if (fd < 0 || err) 3947 if (fd < 0 || err) {
3495 bitmap_destroy(mddev); 3948 bitmap_destroy(mddev);
3949 fd = -1; /* make sure to put the file */
3950 }
3496 mddev->pers->quiesce(mddev, 0); 3951 mddev->pers->quiesce(mddev, 0);
3497 } else if (fd < 0) { 3952 }
3498 if (mddev->bitmap_file) 3953 if (fd < 0) {
3954 if (mddev->bitmap_file) {
3955 restore_bitmap_write_access(mddev->bitmap_file);
3499 fput(mddev->bitmap_file); 3956 fput(mddev->bitmap_file);
3957 }
3500 mddev->bitmap_file = NULL; 3958 mddev->bitmap_file = NULL;
3501 } 3959 }
3502 3960
@@ -3977,11 +4435,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
3977 goto done_unlock; 4435 goto done_unlock;
3978 4436
3979 default: 4437 default:
3980 if (_IOC_TYPE(cmd) == MD_MAJOR)
3981 printk(KERN_WARNING "md: %s(pid %d) used"
3982 " obsolete MD ioctl, upgrade your"
3983 " software to use new ictls.\n",
3984 current->comm, current->pid);
3985 err = -EINVAL; 4438 err = -EINVAL;
3986 goto abort_unlock; 4439 goto abort_unlock;
3987 } 4440 }
@@ -4152,6 +4605,8 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
4152 __builtin_return_address(0),__builtin_return_address(1), 4605 __builtin_return_address(0),__builtin_return_address(1),
4153 __builtin_return_address(2),__builtin_return_address(3)); 4606 __builtin_return_address(2),__builtin_return_address(3));
4154*/ 4607*/
4608 if (!mddev->pers)
4609 return;
4155 if (!mddev->pers->error_handler) 4610 if (!mddev->pers->error_handler)
4156 return; 4611 return;
4157 mddev->pers->error_handler(mddev,rdev); 4612 mddev->pers->error_handler(mddev,rdev);
@@ -4249,12 +4704,13 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
4249 */ 4704 */
4250 dt = ((jiffies - mddev->resync_mark) / HZ); 4705 dt = ((jiffies - mddev->resync_mark) / HZ);
4251 if (!dt) dt++; 4706 if (!dt) dt++;
4252 db = resync - (mddev->resync_mark_cnt/2); 4707 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
4253 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; 4708 - mddev->resync_mark_cnt;
4709 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
4254 4710
4255 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); 4711 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
4256 4712
4257 seq_printf(seq, " speed=%ldK/sec", db/dt); 4713 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
4258} 4714}
4259 4715
4260static void *md_seq_start(struct seq_file *seq, loff_t *pos) 4716static void *md_seq_start(struct seq_file *seq, loff_t *pos)
@@ -4586,7 +5042,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
4586 spin_lock_irq(&mddev->write_lock); 5042 spin_lock_irq(&mddev->write_lock);
4587 if (mddev->in_sync) { 5043 if (mddev->in_sync) {
4588 mddev->in_sync = 0; 5044 mddev->in_sync = 0;
4589 mddev->sb_dirty = 1; 5045 mddev->sb_dirty = 3;
4590 md_wakeup_thread(mddev->thread); 5046 md_wakeup_thread(mddev->thread);
4591 } 5047 }
4592 spin_unlock_irq(&mddev->write_lock); 5048 spin_unlock_irq(&mddev->write_lock);
@@ -4599,7 +5055,7 @@ void md_write_end(mddev_t *mddev)
4599 if (atomic_dec_and_test(&mddev->writes_pending)) { 5055 if (atomic_dec_and_test(&mddev->writes_pending)) {
4600 if (mddev->safemode == 2) 5056 if (mddev->safemode == 2)
4601 md_wakeup_thread(mddev->thread); 5057 md_wakeup_thread(mddev->thread);
4602 else 5058 else if (mddev->safemode_delay)
4603 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); 5059 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
4604 } 5060 }
4605} 5061}
@@ -4620,10 +5076,14 @@ void md_do_sync(mddev_t *mddev)
4620 struct list_head *tmp; 5076 struct list_head *tmp;
4621 sector_t last_check; 5077 sector_t last_check;
4622 int skipped = 0; 5078 int skipped = 0;
5079 struct list_head *rtmp;
5080 mdk_rdev_t *rdev;
4623 5081
4624 /* just incase thread restarts... */ 5082 /* just incase thread restarts... */
4625 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) 5083 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
4626 return; 5084 return;
5085 if (mddev->ro) /* never try to sync a read-only array */
5086 return;
4627 5087
4628 /* we overload curr_resync somewhat here. 5088 /* we overload curr_resync somewhat here.
4629 * 0 == not engaged in resync at all 5089 * 0 == not engaged in resync at all
@@ -4682,17 +5142,30 @@ void md_do_sync(mddev_t *mddev)
4682 } 5142 }
4683 } while (mddev->curr_resync < 2); 5143 } while (mddev->curr_resync < 2);
4684 5144
5145 j = 0;
4685 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 5146 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4686 /* resync follows the size requested by the personality, 5147 /* resync follows the size requested by the personality,
4687 * which defaults to physical size, but can be virtual size 5148 * which defaults to physical size, but can be virtual size
4688 */ 5149 */
4689 max_sectors = mddev->resync_max_sectors; 5150 max_sectors = mddev->resync_max_sectors;
4690 mddev->resync_mismatches = 0; 5151 mddev->resync_mismatches = 0;
5152 /* we don't use the checkpoint if there's a bitmap */
5153 if (!mddev->bitmap &&
5154 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5155 j = mddev->recovery_cp;
4691 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 5156 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4692 max_sectors = mddev->size << 1; 5157 max_sectors = mddev->size << 1;
4693 else 5158 else {
4694 /* recovery follows the physical size of devices */ 5159 /* recovery follows the physical size of devices */
4695 max_sectors = mddev->size << 1; 5160 max_sectors = mddev->size << 1;
5161 j = MaxSector;
5162 ITERATE_RDEV(mddev,rdev,rtmp)
5163 if (rdev->raid_disk >= 0 &&
5164 !test_bit(Faulty, &rdev->flags) &&
5165 !test_bit(In_sync, &rdev->flags) &&
5166 rdev->recovery_offset < j)
5167 j = rdev->recovery_offset;
5168 }
4696 5169
4697 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); 5170 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
4698 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" 5171 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
@@ -4702,12 +5175,7 @@ void md_do_sync(mddev_t *mddev)
4702 speed_max(mddev)); 5175 speed_max(mddev));
4703 5176
4704 is_mddev_idle(mddev); /* this also initializes IO event counters */ 5177 is_mddev_idle(mddev); /* this also initializes IO event counters */
4705 /* we don't use the checkpoint if there's a bitmap */ 5178
4706 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
4707 && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4708 j = mddev->recovery_cp;
4709 else
4710 j = 0;
4711 io_sectors = 0; 5179 io_sectors = 0;
4712 for (m = 0; m < SYNC_MARKS; m++) { 5180 for (m = 0; m < SYNC_MARKS; m++) {
4713 mark[m] = jiffies; 5181 mark[m] = jiffies;
@@ -4753,6 +5221,7 @@ void md_do_sync(mddev_t *mddev)
4753 5221
4754 j += sectors; 5222 j += sectors;
4755 if (j>1) mddev->curr_resync = j; 5223 if (j>1) mddev->curr_resync = j;
5224 mddev->curr_mark_cnt = io_sectors;
4756 if (last_check == 0) 5225 if (last_check == 0)
4757 /* this is the earliers that rebuilt will be 5226 /* this is the earliers that rebuilt will be
4758 * visible in /proc/mdstat 5227 * visible in /proc/mdstat
@@ -4828,15 +5297,28 @@ void md_do_sync(mddev_t *mddev)
4828 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5297 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
4829 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 5298 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
4830 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && 5299 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
4831 mddev->curr_resync > 2 && 5300 mddev->curr_resync > 2) {
4832 mddev->curr_resync >= mddev->recovery_cp) { 5301 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4833 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5302 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4834 printk(KERN_INFO 5303 if (mddev->curr_resync >= mddev->recovery_cp) {
4835 "md: checkpointing recovery of %s.\n", 5304 printk(KERN_INFO
4836 mdname(mddev)); 5305 "md: checkpointing recovery of %s.\n",
4837 mddev->recovery_cp = mddev->curr_resync; 5306 mdname(mddev));
4838 } else 5307 mddev->recovery_cp = mddev->curr_resync;
4839 mddev->recovery_cp = MaxSector; 5308 }
5309 } else
5310 mddev->recovery_cp = MaxSector;
5311 } else {
5312 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5313 mddev->curr_resync = MaxSector;
5314 ITERATE_RDEV(mddev,rdev,rtmp)
5315 if (rdev->raid_disk >= 0 &&
5316 !test_bit(Faulty, &rdev->flags) &&
5317 !test_bit(In_sync, &rdev->flags) &&
5318 rdev->recovery_offset < mddev->curr_resync)
5319 rdev->recovery_offset = mddev->curr_resync;
5320 mddev->sb_dirty = 1;
5321 }
4840 } 5322 }
4841 5323
4842 skip: 5324 skip:
@@ -4908,7 +5390,7 @@ void md_check_recovery(mddev_t *mddev)
4908 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5390 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
4909 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 5391 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
4910 mddev->in_sync = 1; 5392 mddev->in_sync = 1;
4911 mddev->sb_dirty = 1; 5393 mddev->sb_dirty = 3;
4912 } 5394 }
4913 if (mddev->safemode == 1) 5395 if (mddev->safemode == 1)
4914 mddev->safemode = 0; 5396 mddev->safemode = 0;
@@ -4957,6 +5439,8 @@ void md_check_recovery(mddev_t *mddev)
4957 clear_bit(MD_RECOVERY_INTR, &mddev->recovery); 5439 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
4958 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 5440 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4959 5441
5442 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
5443 goto unlock;
4960 /* no recovery is running. 5444 /* no recovery is running.
4961 * remove any failed drives, then 5445 * remove any failed drives, then
4962 * add spares if possible. 5446 * add spares if possible.
@@ -4979,6 +5463,7 @@ void md_check_recovery(mddev_t *mddev)
4979 ITERATE_RDEV(mddev,rdev,rtmp) 5463 ITERATE_RDEV(mddev,rdev,rtmp)
4980 if (rdev->raid_disk < 0 5464 if (rdev->raid_disk < 0
4981 && !test_bit(Faulty, &rdev->flags)) { 5465 && !test_bit(Faulty, &rdev->flags)) {
5466 rdev->recovery_offset = 0;
4982 if (mddev->pers->hot_add_disk(mddev,rdev)) { 5467 if (mddev->pers->hot_add_disk(mddev,rdev)) {
4983 char nm[20]; 5468 char nm[20];
4984 sprintf(nm, "rd%d", rdev->raid_disk); 5469 sprintf(nm, "rd%d", rdev->raid_disk);
@@ -5071,8 +5556,6 @@ static void md_geninit(void)
5071 5556
5072static int __init md_init(void) 5557static int __init md_init(void)
5073{ 5558{
5074 int minor;
5075
5076 printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d," 5559 printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d,"
5077 " MD_SB_DISKS=%d\n", 5560 " MD_SB_DISKS=%d\n",
5078 MD_MAJOR_VERSION, MD_MINOR_VERSION, 5561 MD_MAJOR_VERSION, MD_MINOR_VERSION,
@@ -5086,23 +5569,11 @@ static int __init md_init(void)
5086 unregister_blkdev(MAJOR_NR, "md"); 5569 unregister_blkdev(MAJOR_NR, "md");
5087 return -1; 5570 return -1;
5088 } 5571 }
5089 devfs_mk_dir("md");
5090 blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE, 5572 blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE,
5091 md_probe, NULL, NULL); 5573 md_probe, NULL, NULL);
5092 blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE, 5574 blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE,
5093 md_probe, NULL, NULL); 5575 md_probe, NULL, NULL);
5094 5576
5095 for (minor=0; minor < MAX_MD_DEVS; ++minor)
5096 devfs_mk_bdev(MKDEV(MAJOR_NR, minor),
5097 S_IFBLK|S_IRUSR|S_IWUSR,
5098 "md/%d", minor);
5099
5100 for (minor=0; minor < MAX_MD_DEVS; ++minor)
5101 devfs_mk_bdev(MKDEV(mdp_major, minor<<MdpMinorShift),
5102 S_IFBLK|S_IRUSR|S_IWUSR,
5103 "md/mdp%d", minor);
5104
5105
5106 register_reboot_notifier(&md_notifier); 5577 register_reboot_notifier(&md_notifier);
5107 raid_table_header = register_sysctl_table(raid_root_table, 1); 5578 raid_table_header = register_sysctl_table(raid_root_table, 1);
5108 5579
@@ -5158,15 +5629,9 @@ static __exit void md_exit(void)
5158{ 5629{
5159 mddev_t *mddev; 5630 mddev_t *mddev;
5160 struct list_head *tmp; 5631 struct list_head *tmp;
5161 int i; 5632
5162 blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS); 5633 blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS);
5163 blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift); 5634 blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift);
5164 for (i=0; i < MAX_MD_DEVS; i++)
5165 devfs_remove("md/%d", i);
5166 for (i=0; i < MAX_MD_DEVS; i++)
5167 devfs_remove("md/d%d", i);
5168
5169 devfs_remove("md");
5170 5635
5171 unregister_blkdev(MAJOR_NR,"md"); 5636 unregister_blkdev(MAJOR_NR,"md");
5172 unregister_blkdev(mdp_major, "mdp"); 5637 unregister_blkdev(mdp_major, "mdp");
@@ -5203,8 +5668,8 @@ static int set_ro(const char *val, struct kernel_param *kp)
5203 return -EINVAL; 5668 return -EINVAL;
5204} 5669}
5205 5670
5206module_param_call(start_ro, set_ro, get_ro, NULL, 0600); 5671module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
5207module_param(start_dirty_degraded, int, 0644); 5672module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
5208 5673
5209 5674
5210EXPORT_SYMBOL(register_md_personality); 5675EXPORT_SYMBOL(register_md_personality);
@@ -5216,7 +5681,6 @@ EXPORT_SYMBOL(md_write_end);
5216EXPORT_SYMBOL(md_register_thread); 5681EXPORT_SYMBOL(md_register_thread);
5217EXPORT_SYMBOL(md_unregister_thread); 5682EXPORT_SYMBOL(md_unregister_thread);
5218EXPORT_SYMBOL(md_wakeup_thread); 5683EXPORT_SYMBOL(md_wakeup_thread);
5219EXPORT_SYMBOL(md_print_devices);
5220EXPORT_SYMBOL(md_check_recovery); 5684EXPORT_SYMBOL(md_check_recovery);
5221MODULE_LICENSE("GPL"); 5685MODULE_LICENSE("GPL");
5222MODULE_ALIAS("md"); 5686MODULE_ALIAS("md");