diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 754 |
1 files changed, 609 insertions, 145 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index f19b874753a..8dbab2ef388 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -33,17 +33,16 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/config.h> | ||
37 | #include <linux/kthread.h> | 36 | #include <linux/kthread.h> |
38 | #include <linux/linkage.h> | 37 | #include <linux/linkage.h> |
39 | #include <linux/raid/md.h> | 38 | #include <linux/raid/md.h> |
40 | #include <linux/raid/bitmap.h> | 39 | #include <linux/raid/bitmap.h> |
41 | #include <linux/sysctl.h> | 40 | #include <linux/sysctl.h> |
42 | #include <linux/devfs_fs_kernel.h> | ||
43 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 41 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
44 | #include <linux/suspend.h> | 42 | #include <linux/suspend.h> |
45 | #include <linux/poll.h> | 43 | #include <linux/poll.h> |
46 | #include <linux/mutex.h> | 44 | #include <linux/mutex.h> |
45 | #include <linux/ctype.h> | ||
47 | 46 | ||
48 | #include <linux/init.h> | 47 | #include <linux/init.h> |
49 | 48 | ||
@@ -72,6 +71,10 @@ static void autostart_arrays (int part); | |||
72 | static LIST_HEAD(pers_list); | 71 | static LIST_HEAD(pers_list); |
73 | static DEFINE_SPINLOCK(pers_lock); | 72 | static DEFINE_SPINLOCK(pers_lock); |
74 | 73 | ||
74 | static void md_print_devices(void); | ||
75 | |||
76 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | ||
77 | |||
75 | /* | 78 | /* |
76 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 79 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
77 | * is 1000 KB/sec, so the extra system load does not show up that much. | 80 | * is 1000 KB/sec, so the extra system load does not show up that much. |
@@ -107,7 +110,7 @@ static ctl_table raid_table[] = { | |||
107 | .procname = "speed_limit_min", | 110 | .procname = "speed_limit_min", |
108 | .data = &sysctl_speed_limit_min, | 111 | .data = &sysctl_speed_limit_min, |
109 | .maxlen = sizeof(int), | 112 | .maxlen = sizeof(int), |
110 | .mode = 0644, | 113 | .mode = S_IRUGO|S_IWUSR, |
111 | .proc_handler = &proc_dointvec, | 114 | .proc_handler = &proc_dointvec, |
112 | }, | 115 | }, |
113 | { | 116 | { |
@@ -115,7 +118,7 @@ static ctl_table raid_table[] = { | |||
115 | .procname = "speed_limit_max", | 118 | .procname = "speed_limit_max", |
116 | .data = &sysctl_speed_limit_max, | 119 | .data = &sysctl_speed_limit_max, |
117 | .maxlen = sizeof(int), | 120 | .maxlen = sizeof(int), |
118 | .mode = 0644, | 121 | .mode = S_IRUGO|S_IWUSR, |
119 | .proc_handler = &proc_dointvec, | 122 | .proc_handler = &proc_dointvec, |
120 | }, | 123 | }, |
121 | { .ctl_name = 0 } | 124 | { .ctl_name = 0 } |
@@ -126,7 +129,7 @@ static ctl_table raid_dir_table[] = { | |||
126 | .ctl_name = DEV_RAID, | 129 | .ctl_name = DEV_RAID, |
127 | .procname = "raid", | 130 | .procname = "raid", |
128 | .maxlen = 0, | 131 | .maxlen = 0, |
129 | .mode = 0555, | 132 | .mode = S_IRUGO|S_IXUGO, |
130 | .child = raid_table, | 133 | .child = raid_table, |
131 | }, | 134 | }, |
132 | { .ctl_name = 0 } | 135 | { .ctl_name = 0 } |
@@ -170,7 +173,7 @@ EXPORT_SYMBOL_GPL(md_new_event); | |||
170 | /* Alternate version that can be called from interrupts | 173 | /* Alternate version that can be called from interrupts |
171 | * when calling sysfs_notify isn't needed. | 174 | * when calling sysfs_notify isn't needed. |
172 | */ | 175 | */ |
173 | void md_new_event_inintr(mddev_t *mddev) | 176 | static void md_new_event_inintr(mddev_t *mddev) |
174 | { | 177 | { |
175 | atomic_inc(&md_event_count); | 178 | atomic_inc(&md_event_count); |
176 | wake_up(&md_event_waiters); | 179 | wake_up(&md_event_waiters); |
@@ -732,6 +735,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
732 | { | 735 | { |
733 | mdp_disk_t *desc; | 736 | mdp_disk_t *desc; |
734 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 737 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
738 | __u64 ev1 = md_event(sb); | ||
735 | 739 | ||
736 | rdev->raid_disk = -1; | 740 | rdev->raid_disk = -1; |
737 | rdev->flags = 0; | 741 | rdev->flags = 0; |
@@ -748,7 +752,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
748 | mddev->layout = sb->layout; | 752 | mddev->layout = sb->layout; |
749 | mddev->raid_disks = sb->raid_disks; | 753 | mddev->raid_disks = sb->raid_disks; |
750 | mddev->size = sb->size; | 754 | mddev->size = sb->size; |
751 | mddev->events = md_event(sb); | 755 | mddev->events = ev1; |
752 | mddev->bitmap_offset = 0; | 756 | mddev->bitmap_offset = 0; |
753 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 757 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
754 | 758 | ||
@@ -797,7 +801,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
797 | 801 | ||
798 | } else if (mddev->pers == NULL) { | 802 | } else if (mddev->pers == NULL) { |
799 | /* Insist on good event counter while assembling */ | 803 | /* Insist on good event counter while assembling */ |
800 | __u64 ev1 = md_event(sb); | ||
801 | ++ev1; | 804 | ++ev1; |
802 | if (ev1 < mddev->events) | 805 | if (ev1 < mddev->events) |
803 | return -EINVAL; | 806 | return -EINVAL; |
@@ -805,19 +808,21 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
805 | /* if adding to array with a bitmap, then we can accept an | 808 | /* if adding to array with a bitmap, then we can accept an |
806 | * older device ... but not too old. | 809 | * older device ... but not too old. |
807 | */ | 810 | */ |
808 | __u64 ev1 = md_event(sb); | ||
809 | if (ev1 < mddev->bitmap->events_cleared) | 811 | if (ev1 < mddev->bitmap->events_cleared) |
810 | return 0; | 812 | return 0; |
811 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | 813 | } else { |
812 | return 0; | 814 | if (ev1 < mddev->events) |
815 | /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
816 | return 0; | ||
817 | } | ||
813 | 818 | ||
814 | if (mddev->level != LEVEL_MULTIPATH) { | 819 | if (mddev->level != LEVEL_MULTIPATH) { |
815 | desc = sb->disks + rdev->desc_nr; | 820 | desc = sb->disks + rdev->desc_nr; |
816 | 821 | ||
817 | if (desc->state & (1<<MD_DISK_FAULTY)) | 822 | if (desc->state & (1<<MD_DISK_FAULTY)) |
818 | set_bit(Faulty, &rdev->flags); | 823 | set_bit(Faulty, &rdev->flags); |
819 | else if (desc->state & (1<<MD_DISK_SYNC) && | 824 | else if (desc->state & (1<<MD_DISK_SYNC) /* && |
820 | desc->raid_disk < mddev->raid_disks) { | 825 | desc->raid_disk < mddev->raid_disks */) { |
821 | set_bit(In_sync, &rdev->flags); | 826 | set_bit(In_sync, &rdev->flags); |
822 | rdev->raid_disk = desc->raid_disk; | 827 | rdev->raid_disk = desc->raid_disk; |
823 | } | 828 | } |
@@ -1057,6 +1062,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1057 | if (rdev->sb_size & bmask) | 1062 | if (rdev->sb_size & bmask) |
1058 | rdev-> sb_size = (rdev->sb_size | bmask)+1; | 1063 | rdev-> sb_size = (rdev->sb_size | bmask)+1; |
1059 | 1064 | ||
1065 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) | ||
1066 | rdev->desc_nr = -1; | ||
1067 | else | ||
1068 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | ||
1069 | |||
1060 | if (refdev == 0) | 1070 | if (refdev == 0) |
1061 | ret = 1; | 1071 | ret = 1; |
1062 | else { | 1072 | else { |
@@ -1100,6 +1110,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1100 | static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | 1110 | static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) |
1101 | { | 1111 | { |
1102 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 1112 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
1113 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1103 | 1114 | ||
1104 | rdev->raid_disk = -1; | 1115 | rdev->raid_disk = -1; |
1105 | rdev->flags = 0; | 1116 | rdev->flags = 0; |
@@ -1115,7 +1126,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1115 | mddev->layout = le32_to_cpu(sb->layout); | 1126 | mddev->layout = le32_to_cpu(sb->layout); |
1116 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); | 1127 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); |
1117 | mddev->size = le64_to_cpu(sb->size)/2; | 1128 | mddev->size = le64_to_cpu(sb->size)/2; |
1118 | mddev->events = le64_to_cpu(sb->events); | 1129 | mddev->events = ev1; |
1119 | mddev->bitmap_offset = 0; | 1130 | mddev->bitmap_offset = 0; |
1120 | mddev->default_bitmap_offset = 1024 >> 9; | 1131 | mddev->default_bitmap_offset = 1024 >> 9; |
1121 | 1132 | ||
@@ -1149,7 +1160,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1149 | 1160 | ||
1150 | } else if (mddev->pers == NULL) { | 1161 | } else if (mddev->pers == NULL) { |
1151 | /* Insist of good event counter while assembling */ | 1162 | /* Insist of good event counter while assembling */ |
1152 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1153 | ++ev1; | 1163 | ++ev1; |
1154 | if (ev1 < mddev->events) | 1164 | if (ev1 < mddev->events) |
1155 | return -EINVAL; | 1165 | return -EINVAL; |
@@ -1157,15 +1167,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1157 | /* If adding to array with a bitmap, then we can accept an | 1167 | /* If adding to array with a bitmap, then we can accept an |
1158 | * older device, but not too old. | 1168 | * older device, but not too old. |
1159 | */ | 1169 | */ |
1160 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1161 | if (ev1 < mddev->bitmap->events_cleared) | 1170 | if (ev1 < mddev->bitmap->events_cleared) |
1162 | return 0; | 1171 | return 0; |
1163 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | 1172 | } else { |
1164 | return 0; | 1173 | if (ev1 < mddev->events) |
1165 | 1174 | /* just a hot-add of a new device, leave raid_disk at -1 */ | |
1175 | return 0; | ||
1176 | } | ||
1166 | if (mddev->level != LEVEL_MULTIPATH) { | 1177 | if (mddev->level != LEVEL_MULTIPATH) { |
1167 | int role; | 1178 | int role; |
1168 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | ||
1169 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1179 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
1170 | switch(role) { | 1180 | switch(role) { |
1171 | case 0xffff: /* spare */ | 1181 | case 0xffff: /* spare */ |
@@ -1174,7 +1184,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1174 | set_bit(Faulty, &rdev->flags); | 1184 | set_bit(Faulty, &rdev->flags); |
1175 | break; | 1185 | break; |
1176 | default: | 1186 | default: |
1177 | set_bit(In_sync, &rdev->flags); | 1187 | if ((le32_to_cpu(sb->feature_map) & |
1188 | MD_FEATURE_RECOVERY_OFFSET)) | ||
1189 | rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); | ||
1190 | else | ||
1191 | set_bit(In_sync, &rdev->flags); | ||
1178 | rdev->raid_disk = role; | 1192 | rdev->raid_disk = role; |
1179 | break; | 1193 | break; |
1180 | } | 1194 | } |
@@ -1198,6 +1212,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1198 | 1212 | ||
1199 | sb->feature_map = 0; | 1213 | sb->feature_map = 0; |
1200 | sb->pad0 = 0; | 1214 | sb->pad0 = 0; |
1215 | sb->recovery_offset = cpu_to_le64(0); | ||
1201 | memset(sb->pad1, 0, sizeof(sb->pad1)); | 1216 | memset(sb->pad1, 0, sizeof(sb->pad1)); |
1202 | memset(sb->pad2, 0, sizeof(sb->pad2)); | 1217 | memset(sb->pad2, 0, sizeof(sb->pad2)); |
1203 | memset(sb->pad3, 0, sizeof(sb->pad3)); | 1218 | memset(sb->pad3, 0, sizeof(sb->pad3)); |
@@ -1218,6 +1233,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1218 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1233 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1219 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1234 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1220 | } | 1235 | } |
1236 | |||
1237 | if (rdev->raid_disk >= 0 && | ||
1238 | !test_bit(In_sync, &rdev->flags) && | ||
1239 | rdev->recovery_offset > 0) { | ||
1240 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | ||
1241 | sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); | ||
1242 | } | ||
1243 | |||
1221 | if (mddev->reshape_position != MaxSector) { | 1244 | if (mddev->reshape_position != MaxSector) { |
1222 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | 1245 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); |
1223 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | 1246 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); |
@@ -1242,11 +1265,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1242 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1265 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1243 | else if (test_bit(In_sync, &rdev2->flags)) | 1266 | else if (test_bit(In_sync, &rdev2->flags)) |
1244 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1267 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1268 | else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) | ||
1269 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | ||
1245 | else | 1270 | else |
1246 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1271 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
1247 | } | 1272 | } |
1248 | 1273 | ||
1249 | sb->recovery_offset = cpu_to_le64(0); /* not supported yet */ | ||
1250 | sb->sb_csum = calc_sb_1_csum(sb); | 1274 | sb->sb_csum = calc_sb_1_csum(sb); |
1251 | } | 1275 | } |
1252 | 1276 | ||
@@ -1384,7 +1408,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1384 | struct block_device *bdev; | 1408 | struct block_device *bdev; |
1385 | char b[BDEVNAME_SIZE]; | 1409 | char b[BDEVNAME_SIZE]; |
1386 | 1410 | ||
1387 | bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | 1411 | bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE); |
1388 | if (IS_ERR(bdev)) { | 1412 | if (IS_ERR(bdev)) { |
1389 | printk(KERN_ERR "md: could not open %s.\n", | 1413 | printk(KERN_ERR "md: could not open %s.\n", |
1390 | __bdevname(dev, b)); | 1414 | __bdevname(dev, b)); |
@@ -1394,7 +1418,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1394 | if (err) { | 1418 | if (err) { |
1395 | printk(KERN_ERR "md: could not bd_claim %s.\n", | 1419 | printk(KERN_ERR "md: could not bd_claim %s.\n", |
1396 | bdevname(bdev, b)); | 1420 | bdevname(bdev, b)); |
1397 | blkdev_put(bdev); | 1421 | blkdev_put_partition(bdev); |
1398 | return err; | 1422 | return err; |
1399 | } | 1423 | } |
1400 | rdev->bdev = bdev; | 1424 | rdev->bdev = bdev; |
@@ -1408,7 +1432,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) | |||
1408 | if (!bdev) | 1432 | if (!bdev) |
1409 | MD_BUG(); | 1433 | MD_BUG(); |
1410 | bd_release(bdev); | 1434 | bd_release(bdev); |
1411 | blkdev_put(bdev); | 1435 | blkdev_put_partition(bdev); |
1412 | } | 1436 | } |
1413 | 1437 | ||
1414 | void md_autodetect_dev(dev_t dev); | 1438 | void md_autodetect_dev(dev_t dev); |
@@ -1507,7 +1531,7 @@ static void print_rdev(mdk_rdev_t *rdev) | |||
1507 | printk(KERN_INFO "md: no rdev superblock!\n"); | 1531 | printk(KERN_INFO "md: no rdev superblock!\n"); |
1508 | } | 1532 | } |
1509 | 1533 | ||
1510 | void md_print_devices(void) | 1534 | static void md_print_devices(void) |
1511 | { | 1535 | { |
1512 | struct list_head *tmp, *tmp2; | 1536 | struct list_head *tmp, *tmp2; |
1513 | mdk_rdev_t *rdev; | 1537 | mdk_rdev_t *rdev; |
@@ -1536,15 +1560,30 @@ void md_print_devices(void) | |||
1536 | } | 1560 | } |
1537 | 1561 | ||
1538 | 1562 | ||
1539 | static void sync_sbs(mddev_t * mddev) | 1563 | static void sync_sbs(mddev_t * mddev, int nospares) |
1540 | { | 1564 | { |
1565 | /* Update each superblock (in-memory image), but | ||
1566 | * if we are allowed to, skip spares which already | ||
1567 | * have the right event counter, or have one earlier | ||
1568 | * (which would mean they aren't being marked as dirty | ||
1569 | * with the rest of the array) | ||
1570 | */ | ||
1541 | mdk_rdev_t *rdev; | 1571 | mdk_rdev_t *rdev; |
1542 | struct list_head *tmp; | 1572 | struct list_head *tmp; |
1543 | 1573 | ||
1544 | ITERATE_RDEV(mddev,rdev,tmp) { | 1574 | ITERATE_RDEV(mddev,rdev,tmp) { |
1545 | super_types[mddev->major_version]. | 1575 | if (rdev->sb_events == mddev->events || |
1546 | sync_super(mddev, rdev); | 1576 | (nospares && |
1547 | rdev->sb_loaded = 1; | 1577 | rdev->raid_disk < 0 && |
1578 | (rdev->sb_events&1)==0 && | ||
1579 | rdev->sb_events+1 == mddev->events)) { | ||
1580 | /* Don't update this superblock */ | ||
1581 | rdev->sb_loaded = 2; | ||
1582 | } else { | ||
1583 | super_types[mddev->major_version]. | ||
1584 | sync_super(mddev, rdev); | ||
1585 | rdev->sb_loaded = 1; | ||
1586 | } | ||
1548 | } | 1587 | } |
1549 | } | 1588 | } |
1550 | 1589 | ||
@@ -1554,12 +1593,55 @@ void md_update_sb(mddev_t * mddev) | |||
1554 | struct list_head *tmp; | 1593 | struct list_head *tmp; |
1555 | mdk_rdev_t *rdev; | 1594 | mdk_rdev_t *rdev; |
1556 | int sync_req; | 1595 | int sync_req; |
1596 | int nospares = 0; | ||
1557 | 1597 | ||
1558 | repeat: | 1598 | repeat: |
1559 | spin_lock_irq(&mddev->write_lock); | 1599 | spin_lock_irq(&mddev->write_lock); |
1600 | |||
1601 | if (mddev->degraded && mddev->sb_dirty == 3) | ||
1602 | /* If the array is degraded, then skipping spares is both | ||
1603 | * dangerous and fairly pointless. | ||
1604 | * Dangerous because a device that was removed from the array | ||
1605 | * might have a event_count that still looks up-to-date, | ||
1606 | * so it can be re-added without a resync. | ||
1607 | * Pointless because if there are any spares to skip, | ||
1608 | * then a recovery will happen and soon that array won't | ||
1609 | * be degraded any more and the spare can go back to sleep then. | ||
1610 | */ | ||
1611 | mddev->sb_dirty = 1; | ||
1612 | |||
1560 | sync_req = mddev->in_sync; | 1613 | sync_req = mddev->in_sync; |
1561 | mddev->utime = get_seconds(); | 1614 | mddev->utime = get_seconds(); |
1562 | mddev->events ++; | 1615 | if (mddev->sb_dirty == 3) |
1616 | /* just a clean<-> dirty transition, possibly leave spares alone, | ||
1617 | * though if events isn't the right even/odd, we will have to do | ||
1618 | * spares after all | ||
1619 | */ | ||
1620 | nospares = 1; | ||
1621 | |||
1622 | /* If this is just a dirty<->clean transition, and the array is clean | ||
1623 | * and 'events' is odd, we can roll back to the previous clean state */ | ||
1624 | if (mddev->sb_dirty == 3 | ||
1625 | && (mddev->in_sync && mddev->recovery_cp == MaxSector) | ||
1626 | && (mddev->events & 1)) | ||
1627 | mddev->events--; | ||
1628 | else { | ||
1629 | /* otherwise we have to go forward and ... */ | ||
1630 | mddev->events ++; | ||
1631 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | ||
1632 | /* .. if the array isn't clean, insist on an odd 'events' */ | ||
1633 | if ((mddev->events&1)==0) { | ||
1634 | mddev->events++; | ||
1635 | nospares = 0; | ||
1636 | } | ||
1637 | } else { | ||
1638 | /* otherwise insist on an even 'events' (for clean states) */ | ||
1639 | if ((mddev->events&1)) { | ||
1640 | mddev->events++; | ||
1641 | nospares = 0; | ||
1642 | } | ||
1643 | } | ||
1644 | } | ||
1563 | 1645 | ||
1564 | if (!mddev->events) { | 1646 | if (!mddev->events) { |
1565 | /* | 1647 | /* |
@@ -1571,7 +1653,7 @@ repeat: | |||
1571 | mddev->events --; | 1653 | mddev->events --; |
1572 | } | 1654 | } |
1573 | mddev->sb_dirty = 2; | 1655 | mddev->sb_dirty = 2; |
1574 | sync_sbs(mddev); | 1656 | sync_sbs(mddev, nospares); |
1575 | 1657 | ||
1576 | /* | 1658 | /* |
1577 | * do not write anything to disk if using | 1659 | * do not write anything to disk if using |
@@ -1593,6 +1675,8 @@ repeat: | |||
1593 | ITERATE_RDEV(mddev,rdev,tmp) { | 1675 | ITERATE_RDEV(mddev,rdev,tmp) { |
1594 | char b[BDEVNAME_SIZE]; | 1676 | char b[BDEVNAME_SIZE]; |
1595 | dprintk(KERN_INFO "md: "); | 1677 | dprintk(KERN_INFO "md: "); |
1678 | if (rdev->sb_loaded != 1) | ||
1679 | continue; /* no noise on spare devices */ | ||
1596 | if (test_bit(Faulty, &rdev->flags)) | 1680 | if (test_bit(Faulty, &rdev->flags)) |
1597 | dprintk("(skipping faulty "); | 1681 | dprintk("(skipping faulty "); |
1598 | 1682 | ||
@@ -1604,6 +1688,7 @@ repeat: | |||
1604 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | 1688 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", |
1605 | bdevname(rdev->bdev,b), | 1689 | bdevname(rdev->bdev,b), |
1606 | (unsigned long long)rdev->sb_offset); | 1690 | (unsigned long long)rdev->sb_offset); |
1691 | rdev->sb_events = mddev->events; | ||
1607 | 1692 | ||
1608 | } else | 1693 | } else |
1609 | dprintk(")\n"); | 1694 | dprintk(")\n"); |
@@ -1667,6 +1752,10 @@ state_show(mdk_rdev_t *rdev, char *page) | |||
1667 | len += sprintf(page+len, "%sin_sync",sep); | 1752 | len += sprintf(page+len, "%sin_sync",sep); |
1668 | sep = ","; | 1753 | sep = ","; |
1669 | } | 1754 | } |
1755 | if (test_bit(WriteMostly, &rdev->flags)) { | ||
1756 | len += sprintf(page+len, "%swrite_mostly",sep); | ||
1757 | sep = ","; | ||
1758 | } | ||
1670 | if (!test_bit(Faulty, &rdev->flags) && | 1759 | if (!test_bit(Faulty, &rdev->flags) && |
1671 | !test_bit(In_sync, &rdev->flags)) { | 1760 | !test_bit(In_sync, &rdev->flags)) { |
1672 | len += sprintf(page+len, "%sspare", sep); | 1761 | len += sprintf(page+len, "%sspare", sep); |
@@ -1675,8 +1764,40 @@ state_show(mdk_rdev_t *rdev, char *page) | |||
1675 | return len+sprintf(page+len, "\n"); | 1764 | return len+sprintf(page+len, "\n"); |
1676 | } | 1765 | } |
1677 | 1766 | ||
1678 | static struct rdev_sysfs_entry | 1767 | static ssize_t |
1679 | rdev_state = __ATTR_RO(state); | 1768 | state_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
1769 | { | ||
1770 | /* can write | ||
1771 | * faulty - simulates and error | ||
1772 | * remove - disconnects the device | ||
1773 | * writemostly - sets write_mostly | ||
1774 | * -writemostly - clears write_mostly | ||
1775 | */ | ||
1776 | int err = -EINVAL; | ||
1777 | if (cmd_match(buf, "faulty") && rdev->mddev->pers) { | ||
1778 | md_error(rdev->mddev, rdev); | ||
1779 | err = 0; | ||
1780 | } else if (cmd_match(buf, "remove")) { | ||
1781 | if (rdev->raid_disk >= 0) | ||
1782 | err = -EBUSY; | ||
1783 | else { | ||
1784 | mddev_t *mddev = rdev->mddev; | ||
1785 | kick_rdev_from_array(rdev); | ||
1786 | md_update_sb(mddev); | ||
1787 | md_new_event(mddev); | ||
1788 | err = 0; | ||
1789 | } | ||
1790 | } else if (cmd_match(buf, "writemostly")) { | ||
1791 | set_bit(WriteMostly, &rdev->flags); | ||
1792 | err = 0; | ||
1793 | } else if (cmd_match(buf, "-writemostly")) { | ||
1794 | clear_bit(WriteMostly, &rdev->flags); | ||
1795 | err = 0; | ||
1796 | } | ||
1797 | return err ? err : len; | ||
1798 | } | ||
1799 | static struct rdev_sysfs_entry rdev_state = | ||
1800 | __ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store); | ||
1680 | 1801 | ||
1681 | static ssize_t | 1802 | static ssize_t |
1682 | super_show(mdk_rdev_t *rdev, char *page) | 1803 | super_show(mdk_rdev_t *rdev, char *page) |
@@ -1707,7 +1828,7 @@ errors_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1707 | return -EINVAL; | 1828 | return -EINVAL; |
1708 | } | 1829 | } |
1709 | static struct rdev_sysfs_entry rdev_errors = | 1830 | static struct rdev_sysfs_entry rdev_errors = |
1710 | __ATTR(errors, 0644, errors_show, errors_store); | 1831 | __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store); |
1711 | 1832 | ||
1712 | static ssize_t | 1833 | static ssize_t |
1713 | slot_show(mdk_rdev_t *rdev, char *page) | 1834 | slot_show(mdk_rdev_t *rdev, char *page) |
@@ -1741,7 +1862,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1741 | 1862 | ||
1742 | 1863 | ||
1743 | static struct rdev_sysfs_entry rdev_slot = | 1864 | static struct rdev_sysfs_entry rdev_slot = |
1744 | __ATTR(slot, 0644, slot_show, slot_store); | 1865 | __ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store); |
1745 | 1866 | ||
1746 | static ssize_t | 1867 | static ssize_t |
1747 | offset_show(mdk_rdev_t *rdev, char *page) | 1868 | offset_show(mdk_rdev_t *rdev, char *page) |
@@ -1763,7 +1884,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1763 | } | 1884 | } |
1764 | 1885 | ||
1765 | static struct rdev_sysfs_entry rdev_offset = | 1886 | static struct rdev_sysfs_entry rdev_offset = |
1766 | __ATTR(offset, 0644, offset_show, offset_store); | 1887 | __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); |
1767 | 1888 | ||
1768 | static ssize_t | 1889 | static ssize_t |
1769 | rdev_size_show(mdk_rdev_t *rdev, char *page) | 1890 | rdev_size_show(mdk_rdev_t *rdev, char *page) |
@@ -1787,7 +1908,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1787 | } | 1908 | } |
1788 | 1909 | ||
1789 | static struct rdev_sysfs_entry rdev_size = | 1910 | static struct rdev_sysfs_entry rdev_size = |
1790 | __ATTR(size, 0644, rdev_size_show, rdev_size_store); | 1911 | __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); |
1791 | 1912 | ||
1792 | static struct attribute *rdev_default_attrs[] = { | 1913 | static struct attribute *rdev_default_attrs[] = { |
1793 | &rdev_state.attr, | 1914 | &rdev_state.attr, |
@@ -1818,6 +1939,8 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
1818 | 1939 | ||
1819 | if (!entry->store) | 1940 | if (!entry->store) |
1820 | return -EIO; | 1941 | return -EIO; |
1942 | if (!capable(CAP_SYS_ADMIN)) | ||
1943 | return -EACCES; | ||
1821 | return entry->store(rdev, page, length); | 1944 | return entry->store(rdev, page, length); |
1822 | } | 1945 | } |
1823 | 1946 | ||
@@ -1873,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
1873 | rdev->desc_nr = -1; | 1996 | rdev->desc_nr = -1; |
1874 | rdev->flags = 0; | 1997 | rdev->flags = 0; |
1875 | rdev->data_offset = 0; | 1998 | rdev->data_offset = 0; |
1999 | rdev->sb_events = 0; | ||
1876 | atomic_set(&rdev->nr_pending, 0); | 2000 | atomic_set(&rdev->nr_pending, 0); |
1877 | atomic_set(&rdev->read_errors, 0); | 2001 | atomic_set(&rdev->read_errors, 0); |
1878 | atomic_set(&rdev->corrected_errors, 0); | 2002 | atomic_set(&rdev->corrected_errors, 0); |
@@ -1978,6 +2102,54 @@ static void analyze_sbs(mddev_t * mddev) | |||
1978 | } | 2102 | } |
1979 | 2103 | ||
1980 | static ssize_t | 2104 | static ssize_t |
2105 | safe_delay_show(mddev_t *mddev, char *page) | ||
2106 | { | ||
2107 | int msec = (mddev->safemode_delay*1000)/HZ; | ||
2108 | return sprintf(page, "%d.%03d\n", msec/1000, msec%1000); | ||
2109 | } | ||
2110 | static ssize_t | ||
2111 | safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | ||
2112 | { | ||
2113 | int scale=1; | ||
2114 | int dot=0; | ||
2115 | int i; | ||
2116 | unsigned long msec; | ||
2117 | char buf[30]; | ||
2118 | char *e; | ||
2119 | /* remove a period, and count digits after it */ | ||
2120 | if (len >= sizeof(buf)) | ||
2121 | return -EINVAL; | ||
2122 | strlcpy(buf, cbuf, len); | ||
2123 | buf[len] = 0; | ||
2124 | for (i=0; i<len; i++) { | ||
2125 | if (dot) { | ||
2126 | if (isdigit(buf[i])) { | ||
2127 | buf[i-1] = buf[i]; | ||
2128 | scale *= 10; | ||
2129 | } | ||
2130 | buf[i] = 0; | ||
2131 | } else if (buf[i] == '.') { | ||
2132 | dot=1; | ||
2133 | buf[i] = 0; | ||
2134 | } | ||
2135 | } | ||
2136 | msec = simple_strtoul(buf, &e, 10); | ||
2137 | if (e == buf || (*e && *e != '\n')) | ||
2138 | return -EINVAL; | ||
2139 | msec = (msec * 1000) / scale; | ||
2140 | if (msec == 0) | ||
2141 | mddev->safemode_delay = 0; | ||
2142 | else { | ||
2143 | mddev->safemode_delay = (msec*HZ)/1000; | ||
2144 | if (mddev->safemode_delay == 0) | ||
2145 | mddev->safemode_delay = 1; | ||
2146 | } | ||
2147 | return len; | ||
2148 | } | ||
2149 | static struct md_sysfs_entry md_safe_delay = | ||
2150 | __ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store); | ||
2151 | |||
2152 | static ssize_t | ||
1981 | level_show(mddev_t *mddev, char *page) | 2153 | level_show(mddev_t *mddev, char *page) |
1982 | { | 2154 | { |
1983 | struct mdk_personality *p = mddev->pers; | 2155 | struct mdk_personality *p = mddev->pers; |
@@ -2010,7 +2182,33 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2010 | } | 2182 | } |
2011 | 2183 | ||
2012 | static struct md_sysfs_entry md_level = | 2184 | static struct md_sysfs_entry md_level = |
2013 | __ATTR(level, 0644, level_show, level_store); | 2185 | __ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store); |
2186 | |||
2187 | |||
2188 | static ssize_t | ||
2189 | layout_show(mddev_t *mddev, char *page) | ||
2190 | { | ||
2191 | /* just a number, not meaningful for all levels */ | ||
2192 | return sprintf(page, "%d\n", mddev->layout); | ||
2193 | } | ||
2194 | |||
2195 | static ssize_t | ||
2196 | layout_store(mddev_t *mddev, const char *buf, size_t len) | ||
2197 | { | ||
2198 | char *e; | ||
2199 | unsigned long n = simple_strtoul(buf, &e, 10); | ||
2200 | if (mddev->pers) | ||
2201 | return -EBUSY; | ||
2202 | |||
2203 | if (!*buf || (*e && *e != '\n')) | ||
2204 | return -EINVAL; | ||
2205 | |||
2206 | mddev->layout = n; | ||
2207 | return len; | ||
2208 | } | ||
2209 | static struct md_sysfs_entry md_layout = | ||
2210 | __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); | ||
2211 | |||
2014 | 2212 | ||
2015 | static ssize_t | 2213 | static ssize_t |
2016 | raid_disks_show(mddev_t *mddev, char *page) | 2214 | raid_disks_show(mddev_t *mddev, char *page) |
@@ -2040,7 +2238,7 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len) | |||
2040 | return rv ? rv : len; | 2238 | return rv ? rv : len; |
2041 | } | 2239 | } |
2042 | static struct md_sysfs_entry md_raid_disks = | 2240 | static struct md_sysfs_entry md_raid_disks = |
2043 | __ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); | 2241 | __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); |
2044 | 2242 | ||
2045 | static ssize_t | 2243 | static ssize_t |
2046 | chunk_size_show(mddev_t *mddev, char *page) | 2244 | chunk_size_show(mddev_t *mddev, char *page) |
@@ -2064,7 +2262,202 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2064 | return len; | 2262 | return len; |
2065 | } | 2263 | } |
2066 | static struct md_sysfs_entry md_chunk_size = | 2264 | static struct md_sysfs_entry md_chunk_size = |
2067 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); | 2265 | __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); |
2266 | |||
2267 | static ssize_t | ||
2268 | resync_start_show(mddev_t *mddev, char *page) | ||
2269 | { | ||
2270 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); | ||
2271 | } | ||
2272 | |||
2273 | static ssize_t | ||
2274 | resync_start_store(mddev_t *mddev, const char *buf, size_t len) | ||
2275 | { | ||
2276 | /* can only set chunk_size if array is not yet active */ | ||
2277 | char *e; | ||
2278 | unsigned long long n = simple_strtoull(buf, &e, 10); | ||
2279 | |||
2280 | if (mddev->pers) | ||
2281 | return -EBUSY; | ||
2282 | if (!*buf || (*e && *e != '\n')) | ||
2283 | return -EINVAL; | ||
2284 | |||
2285 | mddev->recovery_cp = n; | ||
2286 | return len; | ||
2287 | } | ||
2288 | static struct md_sysfs_entry md_resync_start = | ||
2289 | __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); | ||
2290 | |||
2291 | /* | ||
2292 | * The array state can be: | ||
2293 | * | ||
2294 | * clear | ||
2295 | * No devices, no size, no level | ||
2296 | * Equivalent to STOP_ARRAY ioctl | ||
2297 | * inactive | ||
2298 | * May have some settings, but array is not active | ||
2299 | * all IO results in error | ||
2300 | * When written, doesn't tear down array, but just stops it | ||
2301 | * suspended (not supported yet) | ||
2302 | * All IO requests will block. The array can be reconfigured. | ||
2303 | * Writing this, if accepted, will block until array is quiessent | ||
2304 | * readonly | ||
2305 | * no resync can happen. no superblocks get written. | ||
2306 | * write requests fail | ||
2307 | * read-auto | ||
2308 | * like readonly, but behaves like 'clean' on a write request. | ||
2309 | * | ||
2310 | * clean - no pending writes, but otherwise active. | ||
2311 | * When written to inactive array, starts without resync | ||
2312 | * If a write request arrives then | ||
2313 | * if metadata is known, mark 'dirty' and switch to 'active'. | ||
2314 | * if not known, block and switch to write-pending | ||
2315 | * If written to an active array that has pending writes, then fails. | ||
2316 | * active | ||
2317 | * fully active: IO and resync can be happening. | ||
2318 | * When written to inactive array, starts with resync | ||
2319 | * | ||
2320 | * write-pending | ||
2321 | * clean, but writes are blocked waiting for 'active' to be written. | ||
2322 | * | ||
2323 | * active-idle | ||
2324 | * like active, but no writes have been seen for a while (100msec). | ||
2325 | * | ||
2326 | */ | ||
2327 | enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, | ||
2328 | write_pending, active_idle, bad_word}; | ||
2329 | static char *array_states[] = { | ||
2330 | "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active", | ||
2331 | "write-pending", "active-idle", NULL }; | ||
2332 | |||
2333 | static int match_word(const char *word, char **list) | ||
2334 | { | ||
2335 | int n; | ||
2336 | for (n=0; list[n]; n++) | ||
2337 | if (cmd_match(word, list[n])) | ||
2338 | break; | ||
2339 | return n; | ||
2340 | } | ||
2341 | |||
2342 | static ssize_t | ||
2343 | array_state_show(mddev_t *mddev, char *page) | ||
2344 | { | ||
2345 | enum array_state st = inactive; | ||
2346 | |||
2347 | if (mddev->pers) | ||
2348 | switch(mddev->ro) { | ||
2349 | case 1: | ||
2350 | st = readonly; | ||
2351 | break; | ||
2352 | case 2: | ||
2353 | st = read_auto; | ||
2354 | break; | ||
2355 | case 0: | ||
2356 | if (mddev->in_sync) | ||
2357 | st = clean; | ||
2358 | else if (mddev->safemode) | ||
2359 | st = active_idle; | ||
2360 | else | ||
2361 | st = active; | ||
2362 | } | ||
2363 | else { | ||
2364 | if (list_empty(&mddev->disks) && | ||
2365 | mddev->raid_disks == 0 && | ||
2366 | mddev->size == 0) | ||
2367 | st = clear; | ||
2368 | else | ||
2369 | st = inactive; | ||
2370 | } | ||
2371 | return sprintf(page, "%s\n", array_states[st]); | ||
2372 | } | ||
2373 | |||
2374 | static int do_md_stop(mddev_t * mddev, int ro); | ||
2375 | static int do_md_run(mddev_t * mddev); | ||
2376 | static int restart_array(mddev_t *mddev); | ||
2377 | |||
2378 | static ssize_t | ||
2379 | array_state_store(mddev_t *mddev, const char *buf, size_t len) | ||
2380 | { | ||
2381 | int err = -EINVAL; | ||
2382 | enum array_state st = match_word(buf, array_states); | ||
2383 | switch(st) { | ||
2384 | case bad_word: | ||
2385 | break; | ||
2386 | case clear: | ||
2387 | /* stopping an active array */ | ||
2388 | if (mddev->pers) { | ||
2389 | if (atomic_read(&mddev->active) > 1) | ||
2390 | return -EBUSY; | ||
2391 | err = do_md_stop(mddev, 0); | ||
2392 | } | ||
2393 | break; | ||
2394 | case inactive: | ||
2395 | /* stopping an active array */ | ||
2396 | if (mddev->pers) { | ||
2397 | if (atomic_read(&mddev->active) > 1) | ||
2398 | return -EBUSY; | ||
2399 | err = do_md_stop(mddev, 2); | ||
2400 | } | ||
2401 | break; | ||
2402 | case suspended: | ||
2403 | break; /* not supported yet */ | ||
2404 | case readonly: | ||
2405 | if (mddev->pers) | ||
2406 | err = do_md_stop(mddev, 1); | ||
2407 | else { | ||
2408 | mddev->ro = 1; | ||
2409 | err = do_md_run(mddev); | ||
2410 | } | ||
2411 | break; | ||
2412 | case read_auto: | ||
2413 | /* stopping an active array */ | ||
2414 | if (mddev->pers) { | ||
2415 | err = do_md_stop(mddev, 1); | ||
2416 | if (err == 0) | ||
2417 | mddev->ro = 2; /* FIXME mark devices writable */ | ||
2418 | } else { | ||
2419 | mddev->ro = 2; | ||
2420 | err = do_md_run(mddev); | ||
2421 | } | ||
2422 | break; | ||
2423 | case clean: | ||
2424 | if (mddev->pers) { | ||
2425 | restart_array(mddev); | ||
2426 | spin_lock_irq(&mddev->write_lock); | ||
2427 | if (atomic_read(&mddev->writes_pending) == 0) { | ||
2428 | mddev->in_sync = 1; | ||
2429 | mddev->sb_dirty = 1; | ||
2430 | } | ||
2431 | spin_unlock_irq(&mddev->write_lock); | ||
2432 | } else { | ||
2433 | mddev->ro = 0; | ||
2434 | mddev->recovery_cp = MaxSector; | ||
2435 | err = do_md_run(mddev); | ||
2436 | } | ||
2437 | break; | ||
2438 | case active: | ||
2439 | if (mddev->pers) { | ||
2440 | restart_array(mddev); | ||
2441 | mddev->sb_dirty = 0; | ||
2442 | wake_up(&mddev->sb_wait); | ||
2443 | err = 0; | ||
2444 | } else { | ||
2445 | mddev->ro = 0; | ||
2446 | err = do_md_run(mddev); | ||
2447 | } | ||
2448 | break; | ||
2449 | case write_pending: | ||
2450 | case active_idle: | ||
2451 | /* these cannot be set */ | ||
2452 | break; | ||
2453 | } | ||
2454 | if (err) | ||
2455 | return err; | ||
2456 | else | ||
2457 | return len; | ||
2458 | } | ||
2459 | static struct md_sysfs_entry md_array_state = | ||
2460 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | ||
2068 | 2461 | ||
2069 | static ssize_t | 2462 | static ssize_t |
2070 | null_show(mddev_t *mddev, char *page) | 2463 | null_show(mddev_t *mddev, char *page) |
@@ -2124,7 +2517,7 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len) | |||
2124 | } | 2517 | } |
2125 | 2518 | ||
2126 | static struct md_sysfs_entry md_new_device = | 2519 | static struct md_sysfs_entry md_new_device = |
2127 | __ATTR(new_dev, 0200, null_show, new_dev_store); | 2520 | __ATTR(new_dev, S_IWUSR, null_show, new_dev_store); |
2128 | 2521 | ||
2129 | static ssize_t | 2522 | static ssize_t |
2130 | size_show(mddev_t *mddev, char *page) | 2523 | size_show(mddev_t *mddev, char *page) |
@@ -2162,7 +2555,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2162 | } | 2555 | } |
2163 | 2556 | ||
2164 | static struct md_sysfs_entry md_size = | 2557 | static struct md_sysfs_entry md_size = |
2165 | __ATTR(component_size, 0644, size_show, size_store); | 2558 | __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); |
2166 | 2559 | ||
2167 | 2560 | ||
2168 | /* Metdata version. | 2561 | /* Metdata version. |
@@ -2210,7 +2603,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2210 | } | 2603 | } |
2211 | 2604 | ||
2212 | static struct md_sysfs_entry md_metadata = | 2605 | static struct md_sysfs_entry md_metadata = |
2213 | __ATTR(metadata_version, 0644, metadata_show, metadata_store); | 2606 | __ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); |
2214 | 2607 | ||
2215 | static ssize_t | 2608 | static ssize_t |
2216 | action_show(mddev_t *mddev, char *page) | 2609 | action_show(mddev_t *mddev, char *page) |
@@ -2278,12 +2671,11 @@ mismatch_cnt_show(mddev_t *mddev, char *page) | |||
2278 | (unsigned long long) mddev->resync_mismatches); | 2671 | (unsigned long long) mddev->resync_mismatches); |
2279 | } | 2672 | } |
2280 | 2673 | ||
2281 | static struct md_sysfs_entry | 2674 | static struct md_sysfs_entry md_scan_mode = |
2282 | md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | 2675 | __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); |
2283 | 2676 | ||
2284 | 2677 | ||
2285 | static struct md_sysfs_entry | 2678 | static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); |
2286 | md_mismatches = __ATTR_RO(mismatch_cnt); | ||
2287 | 2679 | ||
2288 | static ssize_t | 2680 | static ssize_t |
2289 | sync_min_show(mddev_t *mddev, char *page) | 2681 | sync_min_show(mddev_t *mddev, char *page) |
@@ -2342,15 +2734,14 @@ static ssize_t | |||
2342 | sync_speed_show(mddev_t *mddev, char *page) | 2734 | sync_speed_show(mddev_t *mddev, char *page) |
2343 | { | 2735 | { |
2344 | unsigned long resync, dt, db; | 2736 | unsigned long resync, dt, db; |
2345 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); | 2737 | resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)); |
2346 | dt = ((jiffies - mddev->resync_mark) / HZ); | 2738 | dt = ((jiffies - mddev->resync_mark) / HZ); |
2347 | if (!dt) dt++; | 2739 | if (!dt) dt++; |
2348 | db = resync - (mddev->resync_mark_cnt); | 2740 | db = resync - (mddev->resync_mark_cnt); |
2349 | return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ | 2741 | return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ |
2350 | } | 2742 | } |
2351 | 2743 | ||
2352 | static struct md_sysfs_entry | 2744 | static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); |
2353 | md_sync_speed = __ATTR_RO(sync_speed); | ||
2354 | 2745 | ||
2355 | static ssize_t | 2746 | static ssize_t |
2356 | sync_completed_show(mddev_t *mddev, char *page) | 2747 | sync_completed_show(mddev_t *mddev, char *page) |
@@ -2366,8 +2757,7 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2366 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); | 2757 | return sprintf(page, "%lu / %lu\n", resync, max_blocks); |
2367 | } | 2758 | } |
2368 | 2759 | ||
2369 | static struct md_sysfs_entry | 2760 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
2370 | md_sync_completed = __ATTR_RO(sync_completed); | ||
2371 | 2761 | ||
2372 | static ssize_t | 2762 | static ssize_t |
2373 | suspend_lo_show(mddev_t *mddev, char *page) | 2763 | suspend_lo_show(mddev_t *mddev, char *page) |
@@ -2428,11 +2818,15 @@ __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | |||
2428 | 2818 | ||
2429 | static struct attribute *md_default_attrs[] = { | 2819 | static struct attribute *md_default_attrs[] = { |
2430 | &md_level.attr, | 2820 | &md_level.attr, |
2821 | &md_layout.attr, | ||
2431 | &md_raid_disks.attr, | 2822 | &md_raid_disks.attr, |
2432 | &md_chunk_size.attr, | 2823 | &md_chunk_size.attr, |
2433 | &md_size.attr, | 2824 | &md_size.attr, |
2825 | &md_resync_start.attr, | ||
2434 | &md_metadata.attr, | 2826 | &md_metadata.attr, |
2435 | &md_new_device.attr, | 2827 | &md_new_device.attr, |
2828 | &md_safe_delay.attr, | ||
2829 | &md_array_state.attr, | ||
2436 | NULL, | 2830 | NULL, |
2437 | }; | 2831 | }; |
2438 | 2832 | ||
@@ -2480,6 +2874,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, | |||
2480 | 2874 | ||
2481 | if (!entry->store) | 2875 | if (!entry->store) |
2482 | return -EIO; | 2876 | return -EIO; |
2877 | if (!capable(CAP_SYS_ADMIN)) | ||
2878 | return -EACCES; | ||
2483 | rv = mddev_lock(mddev); | 2879 | rv = mddev_lock(mddev); |
2484 | if (!rv) { | 2880 | if (!rv) { |
2485 | rv = entry->store(mddev, page, length); | 2881 | rv = entry->store(mddev, page, length); |
@@ -2532,13 +2928,10 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2532 | } | 2928 | } |
2533 | disk->major = MAJOR(dev); | 2929 | disk->major = MAJOR(dev); |
2534 | disk->first_minor = unit << shift; | 2930 | disk->first_minor = unit << shift; |
2535 | if (partitioned) { | 2931 | if (partitioned) |
2536 | sprintf(disk->disk_name, "md_d%d", unit); | 2932 | sprintf(disk->disk_name, "md_d%d", unit); |
2537 | sprintf(disk->devfs_name, "md/d%d", unit); | 2933 | else |
2538 | } else { | ||
2539 | sprintf(disk->disk_name, "md%d", unit); | 2934 | sprintf(disk->disk_name, "md%d", unit); |
2540 | sprintf(disk->devfs_name, "md/%d", unit); | ||
2541 | } | ||
2542 | disk->fops = &md_fops; | 2935 | disk->fops = &md_fops; |
2543 | disk->private_data = mddev; | 2936 | disk->private_data = mddev; |
2544 | disk->queue = mddev->queue; | 2937 | disk->queue = mddev->queue; |
@@ -2553,8 +2946,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2553 | return NULL; | 2946 | return NULL; |
2554 | } | 2947 | } |
2555 | 2948 | ||
2556 | void md_wakeup_thread(mdk_thread_t *thread); | ||
2557 | |||
2558 | static void md_safemode_timeout(unsigned long data) | 2949 | static void md_safemode_timeout(unsigned long data) |
2559 | { | 2950 | { |
2560 | mddev_t *mddev = (mddev_t *) data; | 2951 | mddev_t *mddev = (mddev_t *) data; |
@@ -2708,7 +3099,7 @@ static int do_md_run(mddev_t * mddev) | |||
2708 | mddev->safemode = 0; | 3099 | mddev->safemode = 0; |
2709 | mddev->safemode_timer.function = md_safemode_timeout; | 3100 | mddev->safemode_timer.function = md_safemode_timeout; |
2710 | mddev->safemode_timer.data = (unsigned long) mddev; | 3101 | mddev->safemode_timer.data = (unsigned long) mddev; |
2711 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ | 3102 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
2712 | mddev->in_sync = 1; | 3103 | mddev->in_sync = 1; |
2713 | 3104 | ||
2714 | ITERATE_RDEV(mddev,rdev,tmp) | 3105 | ITERATE_RDEV(mddev,rdev,tmp) |
@@ -2719,7 +3110,6 @@ static int do_md_run(mddev_t * mddev) | |||
2719 | } | 3110 | } |
2720 | 3111 | ||
2721 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3112 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2722 | md_wakeup_thread(mddev->thread); | ||
2723 | 3113 | ||
2724 | if (mddev->sb_dirty) | 3114 | if (mddev->sb_dirty) |
2725 | md_update_sb(mddev); | 3115 | md_update_sb(mddev); |
@@ -2736,6 +3126,37 @@ static int do_md_run(mddev_t * mddev) | |||
2736 | mddev->queue->queuedata = mddev; | 3126 | mddev->queue->queuedata = mddev; |
2737 | mddev->queue->make_request_fn = mddev->pers->make_request; | 3127 | mddev->queue->make_request_fn = mddev->pers->make_request; |
2738 | 3128 | ||
3129 | /* If there is a partially-recovered drive we need to | ||
3130 | * start recovery here. If we leave it to md_check_recovery, | ||
3131 | * it will remove the drives and not do the right thing | ||
3132 | */ | ||
3133 | if (mddev->degraded && !mddev->sync_thread) { | ||
3134 | struct list_head *rtmp; | ||
3135 | int spares = 0; | ||
3136 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
3137 | if (rdev->raid_disk >= 0 && | ||
3138 | !test_bit(In_sync, &rdev->flags) && | ||
3139 | !test_bit(Faulty, &rdev->flags)) | ||
3140 | /* complete an interrupted recovery */ | ||
3141 | spares++; | ||
3142 | if (spares && mddev->pers->sync_request) { | ||
3143 | mddev->recovery = 0; | ||
3144 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
3145 | mddev->sync_thread = md_register_thread(md_do_sync, | ||
3146 | mddev, | ||
3147 | "%s_resync"); | ||
3148 | if (!mddev->sync_thread) { | ||
3149 | printk(KERN_ERR "%s: could not start resync" | ||
3150 | " thread...\n", | ||
3151 | mdname(mddev)); | ||
3152 | /* leave the spares where they are, it shouldn't hurt */ | ||
3153 | mddev->recovery = 0; | ||
3154 | } | ||
3155 | } | ||
3156 | } | ||
3157 | md_wakeup_thread(mddev->thread); | ||
3158 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | ||
3159 | |||
2739 | mddev->changed = 1; | 3160 | mddev->changed = 1; |
2740 | md_new_event(mddev); | 3161 | md_new_event(mddev); |
2741 | return 0; | 3162 | return 0; |
@@ -2769,18 +3190,47 @@ static int restart_array(mddev_t *mddev) | |||
2769 | */ | 3190 | */ |
2770 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3191 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2771 | md_wakeup_thread(mddev->thread); | 3192 | md_wakeup_thread(mddev->thread); |
3193 | md_wakeup_thread(mddev->sync_thread); | ||
2772 | err = 0; | 3194 | err = 0; |
2773 | } else { | 3195 | } else |
2774 | printk(KERN_ERR "md: %s has no personality assigned.\n", | ||
2775 | mdname(mddev)); | ||
2776 | err = -EINVAL; | 3196 | err = -EINVAL; |
2777 | } | ||
2778 | 3197 | ||
2779 | out: | 3198 | out: |
2780 | return err; | 3199 | return err; |
2781 | } | 3200 | } |
2782 | 3201 | ||
2783 | static int do_md_stop(mddev_t * mddev, int ro) | 3202 | /* similar to deny_write_access, but accounts for our holding a reference |
3203 | * to the file ourselves */ | ||
3204 | static int deny_bitmap_write_access(struct file * file) | ||
3205 | { | ||
3206 | struct inode *inode = file->f_mapping->host; | ||
3207 | |||
3208 | spin_lock(&inode->i_lock); | ||
3209 | if (atomic_read(&inode->i_writecount) > 1) { | ||
3210 | spin_unlock(&inode->i_lock); | ||
3211 | return -ETXTBSY; | ||
3212 | } | ||
3213 | atomic_set(&inode->i_writecount, -1); | ||
3214 | spin_unlock(&inode->i_lock); | ||
3215 | |||
3216 | return 0; | ||
3217 | } | ||
3218 | |||
3219 | static void restore_bitmap_write_access(struct file *file) | ||
3220 | { | ||
3221 | struct inode *inode = file->f_mapping->host; | ||
3222 | |||
3223 | spin_lock(&inode->i_lock); | ||
3224 | atomic_set(&inode->i_writecount, 1); | ||
3225 | spin_unlock(&inode->i_lock); | ||
3226 | } | ||
3227 | |||
3228 | /* mode: | ||
3229 | * 0 - completely stop and dis-assemble array | ||
3230 | * 1 - switch to readonly | ||
3231 | * 2 - stop but do not disassemble array | ||
3232 | */ | ||
3233 | static int do_md_stop(mddev_t * mddev, int mode) | ||
2784 | { | 3234 | { |
2785 | int err = 0; | 3235 | int err = 0; |
2786 | struct gendisk *disk = mddev->gendisk; | 3236 | struct gendisk *disk = mddev->gendisk; |
@@ -2792,6 +3242,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2792 | } | 3242 | } |
2793 | 3243 | ||
2794 | if (mddev->sync_thread) { | 3244 | if (mddev->sync_thread) { |
3245 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2795 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 3246 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
2796 | md_unregister_thread(mddev->sync_thread); | 3247 | md_unregister_thread(mddev->sync_thread); |
2797 | mddev->sync_thread = NULL; | 3248 | mddev->sync_thread = NULL; |
@@ -2801,12 +3252,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2801 | 3252 | ||
2802 | invalidate_partition(disk, 0); | 3253 | invalidate_partition(disk, 0); |
2803 | 3254 | ||
2804 | if (ro) { | 3255 | switch(mode) { |
3256 | case 1: /* readonly */ | ||
2805 | err = -ENXIO; | 3257 | err = -ENXIO; |
2806 | if (mddev->ro==1) | 3258 | if (mddev->ro==1) |
2807 | goto out; | 3259 | goto out; |
2808 | mddev->ro = 1; | 3260 | mddev->ro = 1; |
2809 | } else { | 3261 | break; |
3262 | case 0: /* disassemble */ | ||
3263 | case 2: /* stop */ | ||
2810 | bitmap_flush(mddev); | 3264 | bitmap_flush(mddev); |
2811 | md_super_wait(mddev); | 3265 | md_super_wait(mddev); |
2812 | if (mddev->ro) | 3266 | if (mddev->ro) |
@@ -2821,19 +3275,20 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2821 | if (mddev->ro) | 3275 | if (mddev->ro) |
2822 | mddev->ro = 0; | 3276 | mddev->ro = 0; |
2823 | } | 3277 | } |
2824 | if (!mddev->in_sync) { | 3278 | if (!mddev->in_sync || mddev->sb_dirty) { |
2825 | /* mark array as shutdown cleanly */ | 3279 | /* mark array as shutdown cleanly */ |
2826 | mddev->in_sync = 1; | 3280 | mddev->in_sync = 1; |
2827 | md_update_sb(mddev); | 3281 | md_update_sb(mddev); |
2828 | } | 3282 | } |
2829 | if (ro) | 3283 | if (mode == 1) |
2830 | set_disk_ro(disk, 1); | 3284 | set_disk_ro(disk, 1); |
3285 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2831 | } | 3286 | } |
2832 | 3287 | ||
2833 | /* | 3288 | /* |
2834 | * Free resources if final stop | 3289 | * Free resources if final stop |
2835 | */ | 3290 | */ |
2836 | if (!ro) { | 3291 | if (mode == 0) { |
2837 | mdk_rdev_t *rdev; | 3292 | mdk_rdev_t *rdev; |
2838 | struct list_head *tmp; | 3293 | struct list_head *tmp; |
2839 | struct gendisk *disk; | 3294 | struct gendisk *disk; |
@@ -2841,7 +3296,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2841 | 3296 | ||
2842 | bitmap_destroy(mddev); | 3297 | bitmap_destroy(mddev); |
2843 | if (mddev->bitmap_file) { | 3298 | if (mddev->bitmap_file) { |
2844 | atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); | 3299 | restore_bitmap_write_access(mddev->bitmap_file); |
2845 | fput(mddev->bitmap_file); | 3300 | fput(mddev->bitmap_file); |
2846 | mddev->bitmap_file = NULL; | 3301 | mddev->bitmap_file = NULL; |
2847 | } | 3302 | } |
@@ -2857,11 +3312,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2857 | export_array(mddev); | 3312 | export_array(mddev); |
2858 | 3313 | ||
2859 | mddev->array_size = 0; | 3314 | mddev->array_size = 0; |
3315 | mddev->size = 0; | ||
3316 | mddev->raid_disks = 0; | ||
3317 | mddev->recovery_cp = 0; | ||
3318 | |||
2860 | disk = mddev->gendisk; | 3319 | disk = mddev->gendisk; |
2861 | if (disk) | 3320 | if (disk) |
2862 | set_capacity(disk, 0); | 3321 | set_capacity(disk, 0); |
2863 | mddev->changed = 1; | 3322 | mddev->changed = 1; |
2864 | } else | 3323 | } else if (mddev->pers) |
2865 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3324 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
2866 | mdname(mddev)); | 3325 | mdname(mddev)); |
2867 | err = 0; | 3326 | err = 0; |
@@ -3264,6 +3723,17 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
3264 | 3723 | ||
3265 | rdev->raid_disk = -1; | 3724 | rdev->raid_disk = -1; |
3266 | err = bind_rdev_to_array(rdev, mddev); | 3725 | err = bind_rdev_to_array(rdev, mddev); |
3726 | if (!err && !mddev->pers->hot_remove_disk) { | ||
3727 | /* If there is hot_add_disk but no hot_remove_disk | ||
3728 | * then added disks for geometry changes, | ||
3729 | * and should be added immediately. | ||
3730 | */ | ||
3731 | super_types[mddev->major_version]. | ||
3732 | validate_super(mddev, rdev); | ||
3733 | err = mddev->pers->hot_add_disk(mddev, rdev); | ||
3734 | if (err) | ||
3735 | unbind_rdev_from_array(rdev); | ||
3736 | } | ||
3267 | if (err) | 3737 | if (err) |
3268 | export_rdev(rdev); | 3738 | export_rdev(rdev); |
3269 | 3739 | ||
@@ -3434,23 +3904,6 @@ abort_export: | |||
3434 | return err; | 3904 | return err; |
3435 | } | 3905 | } |
3436 | 3906 | ||
3437 | /* similar to deny_write_access, but accounts for our holding a reference | ||
3438 | * to the file ourselves */ | ||
3439 | static int deny_bitmap_write_access(struct file * file) | ||
3440 | { | ||
3441 | struct inode *inode = file->f_mapping->host; | ||
3442 | |||
3443 | spin_lock(&inode->i_lock); | ||
3444 | if (atomic_read(&inode->i_writecount) > 1) { | ||
3445 | spin_unlock(&inode->i_lock); | ||
3446 | return -ETXTBSY; | ||
3447 | } | ||
3448 | atomic_set(&inode->i_writecount, -1); | ||
3449 | spin_unlock(&inode->i_lock); | ||
3450 | |||
3451 | return 0; | ||
3452 | } | ||
3453 | |||
3454 | static int set_bitmap_file(mddev_t *mddev, int fd) | 3907 | static int set_bitmap_file(mddev_t *mddev, int fd) |
3455 | { | 3908 | { |
3456 | int err; | 3909 | int err; |
@@ -3491,12 +3944,17 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
3491 | mddev->pers->quiesce(mddev, 1); | 3944 | mddev->pers->quiesce(mddev, 1); |
3492 | if (fd >= 0) | 3945 | if (fd >= 0) |
3493 | err = bitmap_create(mddev); | 3946 | err = bitmap_create(mddev); |
3494 | if (fd < 0 || err) | 3947 | if (fd < 0 || err) { |
3495 | bitmap_destroy(mddev); | 3948 | bitmap_destroy(mddev); |
3949 | fd = -1; /* make sure to put the file */ | ||
3950 | } | ||
3496 | mddev->pers->quiesce(mddev, 0); | 3951 | mddev->pers->quiesce(mddev, 0); |
3497 | } else if (fd < 0) { | 3952 | } |
3498 | if (mddev->bitmap_file) | 3953 | if (fd < 0) { |
3954 | if (mddev->bitmap_file) { | ||
3955 | restore_bitmap_write_access(mddev->bitmap_file); | ||
3499 | fput(mddev->bitmap_file); | 3956 | fput(mddev->bitmap_file); |
3957 | } | ||
3500 | mddev->bitmap_file = NULL; | 3958 | mddev->bitmap_file = NULL; |
3501 | } | 3959 | } |
3502 | 3960 | ||
@@ -3977,11 +4435,6 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
3977 | goto done_unlock; | 4435 | goto done_unlock; |
3978 | 4436 | ||
3979 | default: | 4437 | default: |
3980 | if (_IOC_TYPE(cmd) == MD_MAJOR) | ||
3981 | printk(KERN_WARNING "md: %s(pid %d) used" | ||
3982 | " obsolete MD ioctl, upgrade your" | ||
3983 | " software to use new ictls.\n", | ||
3984 | current->comm, current->pid); | ||
3985 | err = -EINVAL; | 4438 | err = -EINVAL; |
3986 | goto abort_unlock; | 4439 | goto abort_unlock; |
3987 | } | 4440 | } |
@@ -4152,6 +4605,8 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
4152 | __builtin_return_address(0),__builtin_return_address(1), | 4605 | __builtin_return_address(0),__builtin_return_address(1), |
4153 | __builtin_return_address(2),__builtin_return_address(3)); | 4606 | __builtin_return_address(2),__builtin_return_address(3)); |
4154 | */ | 4607 | */ |
4608 | if (!mddev->pers) | ||
4609 | return; | ||
4155 | if (!mddev->pers->error_handler) | 4610 | if (!mddev->pers->error_handler) |
4156 | return; | 4611 | return; |
4157 | mddev->pers->error_handler(mddev,rdev); | 4612 | mddev->pers->error_handler(mddev,rdev); |
@@ -4249,12 +4704,13 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
4249 | */ | 4704 | */ |
4250 | dt = ((jiffies - mddev->resync_mark) / HZ); | 4705 | dt = ((jiffies - mddev->resync_mark) / HZ); |
4251 | if (!dt) dt++; | 4706 | if (!dt) dt++; |
4252 | db = resync - (mddev->resync_mark_cnt/2); | 4707 | db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) |
4253 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; | 4708 | - mddev->resync_mark_cnt; |
4709 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; | ||
4254 | 4710 | ||
4255 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); | 4711 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); |
4256 | 4712 | ||
4257 | seq_printf(seq, " speed=%ldK/sec", db/dt); | 4713 | seq_printf(seq, " speed=%ldK/sec", db/2/dt); |
4258 | } | 4714 | } |
4259 | 4715 | ||
4260 | static void *md_seq_start(struct seq_file *seq, loff_t *pos) | 4716 | static void *md_seq_start(struct seq_file *seq, loff_t *pos) |
@@ -4586,7 +5042,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
4586 | spin_lock_irq(&mddev->write_lock); | 5042 | spin_lock_irq(&mddev->write_lock); |
4587 | if (mddev->in_sync) { | 5043 | if (mddev->in_sync) { |
4588 | mddev->in_sync = 0; | 5044 | mddev->in_sync = 0; |
4589 | mddev->sb_dirty = 1; | 5045 | mddev->sb_dirty = 3; |
4590 | md_wakeup_thread(mddev->thread); | 5046 | md_wakeup_thread(mddev->thread); |
4591 | } | 5047 | } |
4592 | spin_unlock_irq(&mddev->write_lock); | 5048 | spin_unlock_irq(&mddev->write_lock); |
@@ -4599,7 +5055,7 @@ void md_write_end(mddev_t *mddev) | |||
4599 | if (atomic_dec_and_test(&mddev->writes_pending)) { | 5055 | if (atomic_dec_and_test(&mddev->writes_pending)) { |
4600 | if (mddev->safemode == 2) | 5056 | if (mddev->safemode == 2) |
4601 | md_wakeup_thread(mddev->thread); | 5057 | md_wakeup_thread(mddev->thread); |
4602 | else | 5058 | else if (mddev->safemode_delay) |
4603 | mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); | 5059 | mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); |
4604 | } | 5060 | } |
4605 | } | 5061 | } |
@@ -4620,10 +5076,14 @@ void md_do_sync(mddev_t *mddev) | |||
4620 | struct list_head *tmp; | 5076 | struct list_head *tmp; |
4621 | sector_t last_check; | 5077 | sector_t last_check; |
4622 | int skipped = 0; | 5078 | int skipped = 0; |
5079 | struct list_head *rtmp; | ||
5080 | mdk_rdev_t *rdev; | ||
4623 | 5081 | ||
4624 | /* just incase thread restarts... */ | 5082 | /* just incase thread restarts... */ |
4625 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 5083 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
4626 | return; | 5084 | return; |
5085 | if (mddev->ro) /* never try to sync a read-only array */ | ||
5086 | return; | ||
4627 | 5087 | ||
4628 | /* we overload curr_resync somewhat here. | 5088 | /* we overload curr_resync somewhat here. |
4629 | * 0 == not engaged in resync at all | 5089 | * 0 == not engaged in resync at all |
@@ -4682,17 +5142,30 @@ void md_do_sync(mddev_t *mddev) | |||
4682 | } | 5142 | } |
4683 | } while (mddev->curr_resync < 2); | 5143 | } while (mddev->curr_resync < 2); |
4684 | 5144 | ||
5145 | j = 0; | ||
4685 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 5146 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4686 | /* resync follows the size requested by the personality, | 5147 | /* resync follows the size requested by the personality, |
4687 | * which defaults to physical size, but can be virtual size | 5148 | * which defaults to physical size, but can be virtual size |
4688 | */ | 5149 | */ |
4689 | max_sectors = mddev->resync_max_sectors; | 5150 | max_sectors = mddev->resync_max_sectors; |
4690 | mddev->resync_mismatches = 0; | 5151 | mddev->resync_mismatches = 0; |
5152 | /* we don't use the checkpoint if there's a bitmap */ | ||
5153 | if (!mddev->bitmap && | ||
5154 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
5155 | j = mddev->recovery_cp; | ||
4691 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 5156 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4692 | max_sectors = mddev->size << 1; | 5157 | max_sectors = mddev->size << 1; |
4693 | else | 5158 | else { |
4694 | /* recovery follows the physical size of devices */ | 5159 | /* recovery follows the physical size of devices */ |
4695 | max_sectors = mddev->size << 1; | 5160 | max_sectors = mddev->size << 1; |
5161 | j = MaxSector; | ||
5162 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5163 | if (rdev->raid_disk >= 0 && | ||
5164 | !test_bit(Faulty, &rdev->flags) && | ||
5165 | !test_bit(In_sync, &rdev->flags) && | ||
5166 | rdev->recovery_offset < j) | ||
5167 | j = rdev->recovery_offset; | ||
5168 | } | ||
4696 | 5169 | ||
4697 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); | 5170 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); |
4698 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" | 5171 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" |
@@ -4702,12 +5175,7 @@ void md_do_sync(mddev_t *mddev) | |||
4702 | speed_max(mddev)); | 5175 | speed_max(mddev)); |
4703 | 5176 | ||
4704 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 5177 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
4705 | /* we don't use the checkpoint if there's a bitmap */ | 5178 | |
4706 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap | ||
4707 | && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
4708 | j = mddev->recovery_cp; | ||
4709 | else | ||
4710 | j = 0; | ||
4711 | io_sectors = 0; | 5179 | io_sectors = 0; |
4712 | for (m = 0; m < SYNC_MARKS; m++) { | 5180 | for (m = 0; m < SYNC_MARKS; m++) { |
4713 | mark[m] = jiffies; | 5181 | mark[m] = jiffies; |
@@ -4753,6 +5221,7 @@ void md_do_sync(mddev_t *mddev) | |||
4753 | 5221 | ||
4754 | j += sectors; | 5222 | j += sectors; |
4755 | if (j>1) mddev->curr_resync = j; | 5223 | if (j>1) mddev->curr_resync = j; |
5224 | mddev->curr_mark_cnt = io_sectors; | ||
4756 | if (last_check == 0) | 5225 | if (last_check == 0) |
4757 | /* this is the earliers that rebuilt will be | 5226 | /* this is the earliers that rebuilt will be |
4758 | * visible in /proc/mdstat | 5227 | * visible in /proc/mdstat |
@@ -4828,15 +5297,28 @@ void md_do_sync(mddev_t *mddev) | |||
4828 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 5297 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
4829 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | 5298 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && |
4830 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | 5299 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && |
4831 | mddev->curr_resync > 2 && | 5300 | mddev->curr_resync > 2) { |
4832 | mddev->curr_resync >= mddev->recovery_cp) { | 5301 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4833 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5302 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
4834 | printk(KERN_INFO | 5303 | if (mddev->curr_resync >= mddev->recovery_cp) { |
4835 | "md: checkpointing recovery of %s.\n", | 5304 | printk(KERN_INFO |
4836 | mdname(mddev)); | 5305 | "md: checkpointing recovery of %s.\n", |
4837 | mddev->recovery_cp = mddev->curr_resync; | 5306 | mdname(mddev)); |
4838 | } else | 5307 | mddev->recovery_cp = mddev->curr_resync; |
4839 | mddev->recovery_cp = MaxSector; | 5308 | } |
5309 | } else | ||
5310 | mddev->recovery_cp = MaxSector; | ||
5311 | } else { | ||
5312 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
5313 | mddev->curr_resync = MaxSector; | ||
5314 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5315 | if (rdev->raid_disk >= 0 && | ||
5316 | !test_bit(Faulty, &rdev->flags) && | ||
5317 | !test_bit(In_sync, &rdev->flags) && | ||
5318 | rdev->recovery_offset < mddev->curr_resync) | ||
5319 | rdev->recovery_offset = mddev->curr_resync; | ||
5320 | mddev->sb_dirty = 1; | ||
5321 | } | ||
4840 | } | 5322 | } |
4841 | 5323 | ||
4842 | skip: | 5324 | skip: |
@@ -4908,7 +5390,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4908 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5390 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
4909 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5391 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
4910 | mddev->in_sync = 1; | 5392 | mddev->in_sync = 1; |
4911 | mddev->sb_dirty = 1; | 5393 | mddev->sb_dirty = 3; |
4912 | } | 5394 | } |
4913 | if (mddev->safemode == 1) | 5395 | if (mddev->safemode == 1) |
4914 | mddev->safemode = 0; | 5396 | mddev->safemode = 0; |
@@ -4957,6 +5439,8 @@ void md_check_recovery(mddev_t *mddev) | |||
4957 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5439 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); |
4958 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5440 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); |
4959 | 5441 | ||
5442 | if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | ||
5443 | goto unlock; | ||
4960 | /* no recovery is running. | 5444 | /* no recovery is running. |
4961 | * remove any failed drives, then | 5445 | * remove any failed drives, then |
4962 | * add spares if possible. | 5446 | * add spares if possible. |
@@ -4979,6 +5463,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4979 | ITERATE_RDEV(mddev,rdev,rtmp) | 5463 | ITERATE_RDEV(mddev,rdev,rtmp) |
4980 | if (rdev->raid_disk < 0 | 5464 | if (rdev->raid_disk < 0 |
4981 | && !test_bit(Faulty, &rdev->flags)) { | 5465 | && !test_bit(Faulty, &rdev->flags)) { |
5466 | rdev->recovery_offset = 0; | ||
4982 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | 5467 | if (mddev->pers->hot_add_disk(mddev,rdev)) { |
4983 | char nm[20]; | 5468 | char nm[20]; |
4984 | sprintf(nm, "rd%d", rdev->raid_disk); | 5469 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -5071,8 +5556,6 @@ static void md_geninit(void) | |||
5071 | 5556 | ||
5072 | static int __init md_init(void) | 5557 | static int __init md_init(void) |
5073 | { | 5558 | { |
5074 | int minor; | ||
5075 | |||
5076 | printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d," | 5559 | printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d," |
5077 | " MD_SB_DISKS=%d\n", | 5560 | " MD_SB_DISKS=%d\n", |
5078 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 5561 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
@@ -5086,23 +5569,11 @@ static int __init md_init(void) | |||
5086 | unregister_blkdev(MAJOR_NR, "md"); | 5569 | unregister_blkdev(MAJOR_NR, "md"); |
5087 | return -1; | 5570 | return -1; |
5088 | } | 5571 | } |
5089 | devfs_mk_dir("md"); | ||
5090 | blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE, | 5572 | blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE, |
5091 | md_probe, NULL, NULL); | 5573 | md_probe, NULL, NULL); |
5092 | blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE, | 5574 | blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE, |
5093 | md_probe, NULL, NULL); | 5575 | md_probe, NULL, NULL); |
5094 | 5576 | ||
5095 | for (minor=0; minor < MAX_MD_DEVS; ++minor) | ||
5096 | devfs_mk_bdev(MKDEV(MAJOR_NR, minor), | ||
5097 | S_IFBLK|S_IRUSR|S_IWUSR, | ||
5098 | "md/%d", minor); | ||
5099 | |||
5100 | for (minor=0; minor < MAX_MD_DEVS; ++minor) | ||
5101 | devfs_mk_bdev(MKDEV(mdp_major, minor<<MdpMinorShift), | ||
5102 | S_IFBLK|S_IRUSR|S_IWUSR, | ||
5103 | "md/mdp%d", minor); | ||
5104 | |||
5105 | |||
5106 | register_reboot_notifier(&md_notifier); | 5577 | register_reboot_notifier(&md_notifier); |
5107 | raid_table_header = register_sysctl_table(raid_root_table, 1); | 5578 | raid_table_header = register_sysctl_table(raid_root_table, 1); |
5108 | 5579 | ||
@@ -5158,15 +5629,9 @@ static __exit void md_exit(void) | |||
5158 | { | 5629 | { |
5159 | mddev_t *mddev; | 5630 | mddev_t *mddev; |
5160 | struct list_head *tmp; | 5631 | struct list_head *tmp; |
5161 | int i; | 5632 | |
5162 | blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS); | 5633 | blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS); |
5163 | blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift); | 5634 | blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift); |
5164 | for (i=0; i < MAX_MD_DEVS; i++) | ||
5165 | devfs_remove("md/%d", i); | ||
5166 | for (i=0; i < MAX_MD_DEVS; i++) | ||
5167 | devfs_remove("md/d%d", i); | ||
5168 | |||
5169 | devfs_remove("md"); | ||
5170 | 5635 | ||
5171 | unregister_blkdev(MAJOR_NR,"md"); | 5636 | unregister_blkdev(MAJOR_NR,"md"); |
5172 | unregister_blkdev(mdp_major, "mdp"); | 5637 | unregister_blkdev(mdp_major, "mdp"); |
@@ -5203,8 +5668,8 @@ static int set_ro(const char *val, struct kernel_param *kp) | |||
5203 | return -EINVAL; | 5668 | return -EINVAL; |
5204 | } | 5669 | } |
5205 | 5670 | ||
5206 | module_param_call(start_ro, set_ro, get_ro, NULL, 0600); | 5671 | module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); |
5207 | module_param(start_dirty_degraded, int, 0644); | 5672 | module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); |
5208 | 5673 | ||
5209 | 5674 | ||
5210 | EXPORT_SYMBOL(register_md_personality); | 5675 | EXPORT_SYMBOL(register_md_personality); |
@@ -5216,7 +5681,6 @@ EXPORT_SYMBOL(md_write_end); | |||
5216 | EXPORT_SYMBOL(md_register_thread); | 5681 | EXPORT_SYMBOL(md_register_thread); |
5217 | EXPORT_SYMBOL(md_unregister_thread); | 5682 | EXPORT_SYMBOL(md_unregister_thread); |
5218 | EXPORT_SYMBOL(md_wakeup_thread); | 5683 | EXPORT_SYMBOL(md_wakeup_thread); |
5219 | EXPORT_SYMBOL(md_print_devices); | ||
5220 | EXPORT_SYMBOL(md_check_recovery); | 5684 | EXPORT_SYMBOL(md_check_recovery); |
5221 | MODULE_LICENSE("GPL"); | 5685 | MODULE_LICENSE("GPL"); |
5222 | MODULE_ALIAS("md"); | 5686 | MODULE_ALIAS("md"); |