diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 634 |
1 files changed, 550 insertions, 84 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index f19b874753a9..306268ec99ff 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/suspend.h> | 44 | #include <linux/suspend.h> |
45 | #include <linux/poll.h> | 45 | #include <linux/poll.h> |
46 | #include <linux/mutex.h> | 46 | #include <linux/mutex.h> |
47 | #include <linux/ctype.h> | ||
47 | 48 | ||
48 | #include <linux/init.h> | 49 | #include <linux/init.h> |
49 | 50 | ||
@@ -72,6 +73,10 @@ static void autostart_arrays (int part); | |||
72 | static LIST_HEAD(pers_list); | 73 | static LIST_HEAD(pers_list); |
73 | static DEFINE_SPINLOCK(pers_lock); | 74 | static DEFINE_SPINLOCK(pers_lock); |
74 | 75 | ||
76 | static void md_print_devices(void); | ||
77 | |||
78 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | ||
79 | |||
75 | /* | 80 | /* |
76 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 81 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
77 | * is 1000 KB/sec, so the extra system load does not show up that much. | 82 | * is 1000 KB/sec, so the extra system load does not show up that much. |
@@ -170,7 +175,7 @@ EXPORT_SYMBOL_GPL(md_new_event); | |||
170 | /* Alternate version that can be called from interrupts | 175 | /* Alternate version that can be called from interrupts |
171 | * when calling sysfs_notify isn't needed. | 176 | * when calling sysfs_notify isn't needed. |
172 | */ | 177 | */ |
173 | void md_new_event_inintr(mddev_t *mddev) | 178 | static void md_new_event_inintr(mddev_t *mddev) |
174 | { | 179 | { |
175 | atomic_inc(&md_event_count); | 180 | atomic_inc(&md_event_count); |
176 | wake_up(&md_event_waiters); | 181 | wake_up(&md_event_waiters); |
@@ -732,6 +737,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
732 | { | 737 | { |
733 | mdp_disk_t *desc; | 738 | mdp_disk_t *desc; |
734 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 739 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
740 | __u64 ev1 = md_event(sb); | ||
735 | 741 | ||
736 | rdev->raid_disk = -1; | 742 | rdev->raid_disk = -1; |
737 | rdev->flags = 0; | 743 | rdev->flags = 0; |
@@ -748,7 +754,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
748 | mddev->layout = sb->layout; | 754 | mddev->layout = sb->layout; |
749 | mddev->raid_disks = sb->raid_disks; | 755 | mddev->raid_disks = sb->raid_disks; |
750 | mddev->size = sb->size; | 756 | mddev->size = sb->size; |
751 | mddev->events = md_event(sb); | 757 | mddev->events = ev1; |
752 | mddev->bitmap_offset = 0; | 758 | mddev->bitmap_offset = 0; |
753 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 759 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
754 | 760 | ||
@@ -797,7 +803,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
797 | 803 | ||
798 | } else if (mddev->pers == NULL) { | 804 | } else if (mddev->pers == NULL) { |
799 | /* Insist on good event counter while assembling */ | 805 | /* Insist on good event counter while assembling */ |
800 | __u64 ev1 = md_event(sb); | ||
801 | ++ev1; | 806 | ++ev1; |
802 | if (ev1 < mddev->events) | 807 | if (ev1 < mddev->events) |
803 | return -EINVAL; | 808 | return -EINVAL; |
@@ -805,19 +810,21 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
805 | /* if adding to array with a bitmap, then we can accept an | 810 | /* if adding to array with a bitmap, then we can accept an |
806 | * older device ... but not too old. | 811 | * older device ... but not too old. |
807 | */ | 812 | */ |
808 | __u64 ev1 = md_event(sb); | ||
809 | if (ev1 < mddev->bitmap->events_cleared) | 813 | if (ev1 < mddev->bitmap->events_cleared) |
810 | return 0; | 814 | return 0; |
811 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | 815 | } else { |
812 | return 0; | 816 | if (ev1 < mddev->events) |
817 | /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
818 | return 0; | ||
819 | } | ||
813 | 820 | ||
814 | if (mddev->level != LEVEL_MULTIPATH) { | 821 | if (mddev->level != LEVEL_MULTIPATH) { |
815 | desc = sb->disks + rdev->desc_nr; | 822 | desc = sb->disks + rdev->desc_nr; |
816 | 823 | ||
817 | if (desc->state & (1<<MD_DISK_FAULTY)) | 824 | if (desc->state & (1<<MD_DISK_FAULTY)) |
818 | set_bit(Faulty, &rdev->flags); | 825 | set_bit(Faulty, &rdev->flags); |
819 | else if (desc->state & (1<<MD_DISK_SYNC) && | 826 | else if (desc->state & (1<<MD_DISK_SYNC) /* && |
820 | desc->raid_disk < mddev->raid_disks) { | 827 | desc->raid_disk < mddev->raid_disks */) { |
821 | set_bit(In_sync, &rdev->flags); | 828 | set_bit(In_sync, &rdev->flags); |
822 | rdev->raid_disk = desc->raid_disk; | 829 | rdev->raid_disk = desc->raid_disk; |
823 | } | 830 | } |
@@ -1100,6 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1100 | static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | 1107 | static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) |
1101 | { | 1108 | { |
1102 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 1109 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
1110 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1103 | 1111 | ||
1104 | rdev->raid_disk = -1; | 1112 | rdev->raid_disk = -1; |
1105 | rdev->flags = 0; | 1113 | rdev->flags = 0; |
@@ -1115,7 +1123,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1115 | mddev->layout = le32_to_cpu(sb->layout); | 1123 | mddev->layout = le32_to_cpu(sb->layout); |
1116 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); | 1124 | mddev->raid_disks = le32_to_cpu(sb->raid_disks); |
1117 | mddev->size = le64_to_cpu(sb->size)/2; | 1125 | mddev->size = le64_to_cpu(sb->size)/2; |
1118 | mddev->events = le64_to_cpu(sb->events); | 1126 | mddev->events = ev1; |
1119 | mddev->bitmap_offset = 0; | 1127 | mddev->bitmap_offset = 0; |
1120 | mddev->default_bitmap_offset = 1024 >> 9; | 1128 | mddev->default_bitmap_offset = 1024 >> 9; |
1121 | 1129 | ||
@@ -1149,7 +1157,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1149 | 1157 | ||
1150 | } else if (mddev->pers == NULL) { | 1158 | } else if (mddev->pers == NULL) { |
1151 | /* Insist of good event counter while assembling */ | 1159 | /* Insist of good event counter while assembling */ |
1152 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1153 | ++ev1; | 1160 | ++ev1; |
1154 | if (ev1 < mddev->events) | 1161 | if (ev1 < mddev->events) |
1155 | return -EINVAL; | 1162 | return -EINVAL; |
@@ -1157,12 +1164,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1157 | /* If adding to array with a bitmap, then we can accept an | 1164 | /* If adding to array with a bitmap, then we can accept an |
1158 | * older device, but not too old. | 1165 | * older device, but not too old. |
1159 | */ | 1166 | */ |
1160 | __u64 ev1 = le64_to_cpu(sb->events); | ||
1161 | if (ev1 < mddev->bitmap->events_cleared) | 1167 | if (ev1 < mddev->bitmap->events_cleared) |
1162 | return 0; | 1168 | return 0; |
1163 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | 1169 | } else { |
1164 | return 0; | 1170 | if (ev1 < mddev->events) |
1165 | 1171 | /* just a hot-add of a new device, leave raid_disk at -1 */ | |
1172 | return 0; | ||
1173 | } | ||
1166 | if (mddev->level != LEVEL_MULTIPATH) { | 1174 | if (mddev->level != LEVEL_MULTIPATH) { |
1167 | int role; | 1175 | int role; |
1168 | rdev->desc_nr = le32_to_cpu(sb->dev_number); | 1176 | rdev->desc_nr = le32_to_cpu(sb->dev_number); |
@@ -1174,7 +1182,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1174 | set_bit(Faulty, &rdev->flags); | 1182 | set_bit(Faulty, &rdev->flags); |
1175 | break; | 1183 | break; |
1176 | default: | 1184 | default: |
1177 | set_bit(In_sync, &rdev->flags); | 1185 | if ((le32_to_cpu(sb->feature_map) & |
1186 | MD_FEATURE_RECOVERY_OFFSET)) | ||
1187 | rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); | ||
1188 | else | ||
1189 | set_bit(In_sync, &rdev->flags); | ||
1178 | rdev->raid_disk = role; | 1190 | rdev->raid_disk = role; |
1179 | break; | 1191 | break; |
1180 | } | 1192 | } |
@@ -1198,6 +1210,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1198 | 1210 | ||
1199 | sb->feature_map = 0; | 1211 | sb->feature_map = 0; |
1200 | sb->pad0 = 0; | 1212 | sb->pad0 = 0; |
1213 | sb->recovery_offset = cpu_to_le64(0); | ||
1201 | memset(sb->pad1, 0, sizeof(sb->pad1)); | 1214 | memset(sb->pad1, 0, sizeof(sb->pad1)); |
1202 | memset(sb->pad2, 0, sizeof(sb->pad2)); | 1215 | memset(sb->pad2, 0, sizeof(sb->pad2)); |
1203 | memset(sb->pad3, 0, sizeof(sb->pad3)); | 1216 | memset(sb->pad3, 0, sizeof(sb->pad3)); |
@@ -1218,6 +1231,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1218 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1231 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1219 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1232 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1220 | } | 1233 | } |
1234 | |||
1235 | if (rdev->raid_disk >= 0 && | ||
1236 | !test_bit(In_sync, &rdev->flags) && | ||
1237 | rdev->recovery_offset > 0) { | ||
1238 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | ||
1239 | sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); | ||
1240 | } | ||
1241 | |||
1221 | if (mddev->reshape_position != MaxSector) { | 1242 | if (mddev->reshape_position != MaxSector) { |
1222 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | 1243 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); |
1223 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | 1244 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); |
@@ -1242,11 +1263,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1242 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1263 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1243 | else if (test_bit(In_sync, &rdev2->flags)) | 1264 | else if (test_bit(In_sync, &rdev2->flags)) |
1244 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1265 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1266 | else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) | ||
1267 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | ||
1245 | else | 1268 | else |
1246 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1269 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
1247 | } | 1270 | } |
1248 | 1271 | ||
1249 | sb->recovery_offset = cpu_to_le64(0); /* not supported yet */ | ||
1250 | sb->sb_csum = calc_sb_1_csum(sb); | 1272 | sb->sb_csum = calc_sb_1_csum(sb); |
1251 | } | 1273 | } |
1252 | 1274 | ||
@@ -1507,7 +1529,7 @@ static void print_rdev(mdk_rdev_t *rdev) | |||
1507 | printk(KERN_INFO "md: no rdev superblock!\n"); | 1529 | printk(KERN_INFO "md: no rdev superblock!\n"); |
1508 | } | 1530 | } |
1509 | 1531 | ||
1510 | void md_print_devices(void) | 1532 | static void md_print_devices(void) |
1511 | { | 1533 | { |
1512 | struct list_head *tmp, *tmp2; | 1534 | struct list_head *tmp, *tmp2; |
1513 | mdk_rdev_t *rdev; | 1535 | mdk_rdev_t *rdev; |
@@ -1536,15 +1558,30 @@ void md_print_devices(void) | |||
1536 | } | 1558 | } |
1537 | 1559 | ||
1538 | 1560 | ||
1539 | static void sync_sbs(mddev_t * mddev) | 1561 | static void sync_sbs(mddev_t * mddev, int nospares) |
1540 | { | 1562 | { |
1563 | /* Update each superblock (in-memory image), but | ||
1564 | * if we are allowed to, skip spares which already | ||
1565 | * have the right event counter, or have one earlier | ||
1566 | * (which would mean they aren't being marked as dirty | ||
1567 | * with the rest of the array) | ||
1568 | */ | ||
1541 | mdk_rdev_t *rdev; | 1569 | mdk_rdev_t *rdev; |
1542 | struct list_head *tmp; | 1570 | struct list_head *tmp; |
1543 | 1571 | ||
1544 | ITERATE_RDEV(mddev,rdev,tmp) { | 1572 | ITERATE_RDEV(mddev,rdev,tmp) { |
1545 | super_types[mddev->major_version]. | 1573 | if (rdev->sb_events == mddev->events || |
1546 | sync_super(mddev, rdev); | 1574 | (nospares && |
1547 | rdev->sb_loaded = 1; | 1575 | rdev->raid_disk < 0 && |
1576 | (rdev->sb_events&1)==0 && | ||
1577 | rdev->sb_events+1 == mddev->events)) { | ||
1578 | /* Don't update this superblock */ | ||
1579 | rdev->sb_loaded = 2; | ||
1580 | } else { | ||
1581 | super_types[mddev->major_version]. | ||
1582 | sync_super(mddev, rdev); | ||
1583 | rdev->sb_loaded = 1; | ||
1584 | } | ||
1548 | } | 1585 | } |
1549 | } | 1586 | } |
1550 | 1587 | ||
@@ -1554,12 +1591,42 @@ void md_update_sb(mddev_t * mddev) | |||
1554 | struct list_head *tmp; | 1591 | struct list_head *tmp; |
1555 | mdk_rdev_t *rdev; | 1592 | mdk_rdev_t *rdev; |
1556 | int sync_req; | 1593 | int sync_req; |
1594 | int nospares = 0; | ||
1557 | 1595 | ||
1558 | repeat: | 1596 | repeat: |
1559 | spin_lock_irq(&mddev->write_lock); | 1597 | spin_lock_irq(&mddev->write_lock); |
1560 | sync_req = mddev->in_sync; | 1598 | sync_req = mddev->in_sync; |
1561 | mddev->utime = get_seconds(); | 1599 | mddev->utime = get_seconds(); |
1562 | mddev->events ++; | 1600 | if (mddev->sb_dirty == 3) |
1601 | /* just a clean<-> dirty transition, possibly leave spares alone, | ||
1602 | * though if events isn't the right even/odd, we will have to do | ||
1603 | * spares after all | ||
1604 | */ | ||
1605 | nospares = 1; | ||
1606 | |||
1607 | /* If this is just a dirty<->clean transition, and the array is clean | ||
1608 | * and 'events' is odd, we can roll back to the previous clean state */ | ||
1609 | if (mddev->sb_dirty == 3 | ||
1610 | && (mddev->in_sync && mddev->recovery_cp == MaxSector) | ||
1611 | && (mddev->events & 1)) | ||
1612 | mddev->events--; | ||
1613 | else { | ||
1614 | /* otherwise we have to go forward and ... */ | ||
1615 | mddev->events ++; | ||
1616 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | ||
1617 | /* .. if the array isn't clean, insist on an odd 'events' */ | ||
1618 | if ((mddev->events&1)==0) { | ||
1619 | mddev->events++; | ||
1620 | nospares = 0; | ||
1621 | } | ||
1622 | } else { | ||
1623 | /* otherwise insist on an even 'events' (for clean states) */ | ||
1624 | if ((mddev->events&1)) { | ||
1625 | mddev->events++; | ||
1626 | nospares = 0; | ||
1627 | } | ||
1628 | } | ||
1629 | } | ||
1563 | 1630 | ||
1564 | if (!mddev->events) { | 1631 | if (!mddev->events) { |
1565 | /* | 1632 | /* |
@@ -1571,7 +1638,7 @@ repeat: | |||
1571 | mddev->events --; | 1638 | mddev->events --; |
1572 | } | 1639 | } |
1573 | mddev->sb_dirty = 2; | 1640 | mddev->sb_dirty = 2; |
1574 | sync_sbs(mddev); | 1641 | sync_sbs(mddev, nospares); |
1575 | 1642 | ||
1576 | /* | 1643 | /* |
1577 | * do not write anything to disk if using | 1644 | * do not write anything to disk if using |
@@ -1593,6 +1660,8 @@ repeat: | |||
1593 | ITERATE_RDEV(mddev,rdev,tmp) { | 1660 | ITERATE_RDEV(mddev,rdev,tmp) { |
1594 | char b[BDEVNAME_SIZE]; | 1661 | char b[BDEVNAME_SIZE]; |
1595 | dprintk(KERN_INFO "md: "); | 1662 | dprintk(KERN_INFO "md: "); |
1663 | if (rdev->sb_loaded != 1) | ||
1664 | continue; /* no noise on spare devices */ | ||
1596 | if (test_bit(Faulty, &rdev->flags)) | 1665 | if (test_bit(Faulty, &rdev->flags)) |
1597 | dprintk("(skipping faulty "); | 1666 | dprintk("(skipping faulty "); |
1598 | 1667 | ||
@@ -1604,6 +1673,7 @@ repeat: | |||
1604 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | 1673 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", |
1605 | bdevname(rdev->bdev,b), | 1674 | bdevname(rdev->bdev,b), |
1606 | (unsigned long long)rdev->sb_offset); | 1675 | (unsigned long long)rdev->sb_offset); |
1676 | rdev->sb_events = mddev->events; | ||
1607 | 1677 | ||
1608 | } else | 1678 | } else |
1609 | dprintk(")\n"); | 1679 | dprintk(")\n"); |
@@ -1667,6 +1737,10 @@ state_show(mdk_rdev_t *rdev, char *page) | |||
1667 | len += sprintf(page+len, "%sin_sync",sep); | 1737 | len += sprintf(page+len, "%sin_sync",sep); |
1668 | sep = ","; | 1738 | sep = ","; |
1669 | } | 1739 | } |
1740 | if (test_bit(WriteMostly, &rdev->flags)) { | ||
1741 | len += sprintf(page+len, "%swrite_mostly",sep); | ||
1742 | sep = ","; | ||
1743 | } | ||
1670 | if (!test_bit(Faulty, &rdev->flags) && | 1744 | if (!test_bit(Faulty, &rdev->flags) && |
1671 | !test_bit(In_sync, &rdev->flags)) { | 1745 | !test_bit(In_sync, &rdev->flags)) { |
1672 | len += sprintf(page+len, "%sspare", sep); | 1746 | len += sprintf(page+len, "%sspare", sep); |
@@ -1675,8 +1749,40 @@ state_show(mdk_rdev_t *rdev, char *page) | |||
1675 | return len+sprintf(page+len, "\n"); | 1749 | return len+sprintf(page+len, "\n"); |
1676 | } | 1750 | } |
1677 | 1751 | ||
1752 | static ssize_t | ||
1753 | state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | ||
1754 | { | ||
1755 | /* can write | ||
1756 | * faulty - simulates and error | ||
1757 | * remove - disconnects the device | ||
1758 | * writemostly - sets write_mostly | ||
1759 | * -writemostly - clears write_mostly | ||
1760 | */ | ||
1761 | int err = -EINVAL; | ||
1762 | if (cmd_match(buf, "faulty") && rdev->mddev->pers) { | ||
1763 | md_error(rdev->mddev, rdev); | ||
1764 | err = 0; | ||
1765 | } else if (cmd_match(buf, "remove")) { | ||
1766 | if (rdev->raid_disk >= 0) | ||
1767 | err = -EBUSY; | ||
1768 | else { | ||
1769 | mddev_t *mddev = rdev->mddev; | ||
1770 | kick_rdev_from_array(rdev); | ||
1771 | md_update_sb(mddev); | ||
1772 | md_new_event(mddev); | ||
1773 | err = 0; | ||
1774 | } | ||
1775 | } else if (cmd_match(buf, "writemostly")) { | ||
1776 | set_bit(WriteMostly, &rdev->flags); | ||
1777 | err = 0; | ||
1778 | } else if (cmd_match(buf, "-writemostly")) { | ||
1779 | clear_bit(WriteMostly, &rdev->flags); | ||
1780 | err = 0; | ||
1781 | } | ||
1782 | return err ? err : len; | ||
1783 | } | ||
1678 | static struct rdev_sysfs_entry | 1784 | static struct rdev_sysfs_entry |
1679 | rdev_state = __ATTR_RO(state); | 1785 | rdev_state = __ATTR(state, 0644, state_show, state_store); |
1680 | 1786 | ||
1681 | static ssize_t | 1787 | static ssize_t |
1682 | super_show(mdk_rdev_t *rdev, char *page) | 1788 | super_show(mdk_rdev_t *rdev, char *page) |
@@ -1873,6 +1979,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
1873 | rdev->desc_nr = -1; | 1979 | rdev->desc_nr = -1; |
1874 | rdev->flags = 0; | 1980 | rdev->flags = 0; |
1875 | rdev->data_offset = 0; | 1981 | rdev->data_offset = 0; |
1982 | rdev->sb_events = 0; | ||
1876 | atomic_set(&rdev->nr_pending, 0); | 1983 | atomic_set(&rdev->nr_pending, 0); |
1877 | atomic_set(&rdev->read_errors, 0); | 1984 | atomic_set(&rdev->read_errors, 0); |
1878 | atomic_set(&rdev->corrected_errors, 0); | 1985 | atomic_set(&rdev->corrected_errors, 0); |
@@ -1978,6 +2085,54 @@ static void analyze_sbs(mddev_t * mddev) | |||
1978 | } | 2085 | } |
1979 | 2086 | ||
1980 | static ssize_t | 2087 | static ssize_t |
2088 | safe_delay_show(mddev_t *mddev, char *page) | ||
2089 | { | ||
2090 | int msec = (mddev->safemode_delay*1000)/HZ; | ||
2091 | return sprintf(page, "%d.%03d\n", msec/1000, msec%1000); | ||
2092 | } | ||
2093 | static ssize_t | ||
2094 | safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) | ||
2095 | { | ||
2096 | int scale=1; | ||
2097 | int dot=0; | ||
2098 | int i; | ||
2099 | unsigned long msec; | ||
2100 | char buf[30]; | ||
2101 | char *e; | ||
2102 | /* remove a period, and count digits after it */ | ||
2103 | if (len >= sizeof(buf)) | ||
2104 | return -EINVAL; | ||
2105 | strlcpy(buf, cbuf, len); | ||
2106 | buf[len] = 0; | ||
2107 | for (i=0; i<len; i++) { | ||
2108 | if (dot) { | ||
2109 | if (isdigit(buf[i])) { | ||
2110 | buf[i-1] = buf[i]; | ||
2111 | scale *= 10; | ||
2112 | } | ||
2113 | buf[i] = 0; | ||
2114 | } else if (buf[i] == '.') { | ||
2115 | dot=1; | ||
2116 | buf[i] = 0; | ||
2117 | } | ||
2118 | } | ||
2119 | msec = simple_strtoul(buf, &e, 10); | ||
2120 | if (e == buf || (*e && *e != '\n')) | ||
2121 | return -EINVAL; | ||
2122 | msec = (msec * 1000) / scale; | ||
2123 | if (msec == 0) | ||
2124 | mddev->safemode_delay = 0; | ||
2125 | else { | ||
2126 | mddev->safemode_delay = (msec*HZ)/1000; | ||
2127 | if (mddev->safemode_delay == 0) | ||
2128 | mddev->safemode_delay = 1; | ||
2129 | } | ||
2130 | return len; | ||
2131 | } | ||
2132 | static struct md_sysfs_entry md_safe_delay = | ||
2133 | __ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store); | ||
2134 | |||
2135 | static ssize_t | ||
1981 | level_show(mddev_t *mddev, char *page) | 2136 | level_show(mddev_t *mddev, char *page) |
1982 | { | 2137 | { |
1983 | struct mdk_personality *p = mddev->pers; | 2138 | struct mdk_personality *p = mddev->pers; |
@@ -2012,6 +2167,32 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2012 | static struct md_sysfs_entry md_level = | 2167 | static struct md_sysfs_entry md_level = |
2013 | __ATTR(level, 0644, level_show, level_store); | 2168 | __ATTR(level, 0644, level_show, level_store); |
2014 | 2169 | ||
2170 | |||
2171 | static ssize_t | ||
2172 | layout_show(mddev_t *mddev, char *page) | ||
2173 | { | ||
2174 | /* just a number, not meaningful for all levels */ | ||
2175 | return sprintf(page, "%d\n", mddev->layout); | ||
2176 | } | ||
2177 | |||
2178 | static ssize_t | ||
2179 | layout_store(mddev_t *mddev, const char *buf, size_t len) | ||
2180 | { | ||
2181 | char *e; | ||
2182 | unsigned long n = simple_strtoul(buf, &e, 10); | ||
2183 | if (mddev->pers) | ||
2184 | return -EBUSY; | ||
2185 | |||
2186 | if (!*buf || (*e && *e != '\n')) | ||
2187 | return -EINVAL; | ||
2188 | |||
2189 | mddev->layout = n; | ||
2190 | return len; | ||
2191 | } | ||
2192 | static struct md_sysfs_entry md_layout = | ||
2193 | __ATTR(layout, 0655, layout_show, layout_store); | ||
2194 | |||
2195 | |||
2015 | static ssize_t | 2196 | static ssize_t |
2016 | raid_disks_show(mddev_t *mddev, char *page) | 2197 | raid_disks_show(mddev_t *mddev, char *page) |
2017 | { | 2198 | { |
@@ -2067,6 +2248,200 @@ static struct md_sysfs_entry md_chunk_size = | |||
2067 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); | 2248 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); |
2068 | 2249 | ||
2069 | static ssize_t | 2250 | static ssize_t |
2251 | resync_start_show(mddev_t *mddev, char *page) | ||
2252 | { | ||
2253 | return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); | ||
2254 | } | ||
2255 | |||
2256 | static ssize_t | ||
2257 | resync_start_store(mddev_t *mddev, const char *buf, size_t len) | ||
2258 | { | ||
2259 | /* can only set chunk_size if array is not yet active */ | ||
2260 | char *e; | ||
2261 | unsigned long long n = simple_strtoull(buf, &e, 10); | ||
2262 | |||
2263 | if (mddev->pers) | ||
2264 | return -EBUSY; | ||
2265 | if (!*buf || (*e && *e != '\n')) | ||
2266 | return -EINVAL; | ||
2267 | |||
2268 | mddev->recovery_cp = n; | ||
2269 | return len; | ||
2270 | } | ||
2271 | static struct md_sysfs_entry md_resync_start = | ||
2272 | __ATTR(resync_start, 0644, resync_start_show, resync_start_store); | ||
2273 | |||
2274 | /* | ||
2275 | * The array state can be: | ||
2276 | * | ||
2277 | * clear | ||
2278 | * No devices, no size, no level | ||
2279 | * Equivalent to STOP_ARRAY ioctl | ||
2280 | * inactive | ||
2281 | * May have some settings, but array is not active | ||
2282 | * all IO results in error | ||
2283 | * When written, doesn't tear down array, but just stops it | ||
2284 | * suspended (not supported yet) | ||
2285 | * All IO requests will block. The array can be reconfigured. | ||
2286 | * Writing this, if accepted, will block until array is quiessent | ||
2287 | * readonly | ||
2288 | * no resync can happen. no superblocks get written. | ||
2289 | * write requests fail | ||
2290 | * read-auto | ||
2291 | * like readonly, but behaves like 'clean' on a write request. | ||
2292 | * | ||
2293 | * clean - no pending writes, but otherwise active. | ||
2294 | * When written to inactive array, starts without resync | ||
2295 | * If a write request arrives then | ||
2296 | * if metadata is known, mark 'dirty' and switch to 'active'. | ||
2297 | * if not known, block and switch to write-pending | ||
2298 | * If written to an active array that has pending writes, then fails. | ||
2299 | * active | ||
2300 | * fully active: IO and resync can be happening. | ||
2301 | * When written to inactive array, starts with resync | ||
2302 | * | ||
2303 | * write-pending | ||
2304 | * clean, but writes are blocked waiting for 'active' to be written. | ||
2305 | * | ||
2306 | * active-idle | ||
2307 | * like active, but no writes have been seen for a while (100msec). | ||
2308 | * | ||
2309 | */ | ||
2310 | enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, | ||
2311 | write_pending, active_idle, bad_word}; | ||
2312 | static char *array_states[] = { | ||
2313 | "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active", | ||
2314 | "write-pending", "active-idle", NULL }; | ||
2315 | |||
2316 | static int match_word(const char *word, char **list) | ||
2317 | { | ||
2318 | int n; | ||
2319 | for (n=0; list[n]; n++) | ||
2320 | if (cmd_match(word, list[n])) | ||
2321 | break; | ||
2322 | return n; | ||
2323 | } | ||
2324 | |||
2325 | static ssize_t | ||
2326 | array_state_show(mddev_t *mddev, char *page) | ||
2327 | { | ||
2328 | enum array_state st = inactive; | ||
2329 | |||
2330 | if (mddev->pers) | ||
2331 | switch(mddev->ro) { | ||
2332 | case 1: | ||
2333 | st = readonly; | ||
2334 | break; | ||
2335 | case 2: | ||
2336 | st = read_auto; | ||
2337 | break; | ||
2338 | case 0: | ||
2339 | if (mddev->in_sync) | ||
2340 | st = clean; | ||
2341 | else if (mddev->safemode) | ||
2342 | st = active_idle; | ||
2343 | else | ||
2344 | st = active; | ||
2345 | } | ||
2346 | else { | ||
2347 | if (list_empty(&mddev->disks) && | ||
2348 | mddev->raid_disks == 0 && | ||
2349 | mddev->size == 0) | ||
2350 | st = clear; | ||
2351 | else | ||
2352 | st = inactive; | ||
2353 | } | ||
2354 | return sprintf(page, "%s\n", array_states[st]); | ||
2355 | } | ||
2356 | |||
2357 | static int do_md_stop(mddev_t * mddev, int ro); | ||
2358 | static int do_md_run(mddev_t * mddev); | ||
2359 | static int restart_array(mddev_t *mddev); | ||
2360 | |||
2361 | static ssize_t | ||
2362 | array_state_store(mddev_t *mddev, const char *buf, size_t len) | ||
2363 | { | ||
2364 | int err = -EINVAL; | ||
2365 | enum array_state st = match_word(buf, array_states); | ||
2366 | switch(st) { | ||
2367 | case bad_word: | ||
2368 | break; | ||
2369 | case clear: | ||
2370 | /* stopping an active array */ | ||
2371 | if (mddev->pers) { | ||
2372 | if (atomic_read(&mddev->active) > 1) | ||
2373 | return -EBUSY; | ||
2374 | err = do_md_stop(mddev, 0); | ||
2375 | } | ||
2376 | break; | ||
2377 | case inactive: | ||
2378 | /* stopping an active array */ | ||
2379 | if (mddev->pers) { | ||
2380 | if (atomic_read(&mddev->active) > 1) | ||
2381 | return -EBUSY; | ||
2382 | err = do_md_stop(mddev, 2); | ||
2383 | } | ||
2384 | break; | ||
2385 | case suspended: | ||
2386 | break; /* not supported yet */ | ||
2387 | case readonly: | ||
2388 | if (mddev->pers) | ||
2389 | err = do_md_stop(mddev, 1); | ||
2390 | else { | ||
2391 | mddev->ro = 1; | ||
2392 | err = do_md_run(mddev); | ||
2393 | } | ||
2394 | break; | ||
2395 | case read_auto: | ||
2396 | /* stopping an active array */ | ||
2397 | if (mddev->pers) { | ||
2398 | err = do_md_stop(mddev, 1); | ||
2399 | if (err == 0) | ||
2400 | mddev->ro = 2; /* FIXME mark devices writable */ | ||
2401 | } else { | ||
2402 | mddev->ro = 2; | ||
2403 | err = do_md_run(mddev); | ||
2404 | } | ||
2405 | break; | ||
2406 | case clean: | ||
2407 | if (mddev->pers) { | ||
2408 | restart_array(mddev); | ||
2409 | spin_lock_irq(&mddev->write_lock); | ||
2410 | if (atomic_read(&mddev->writes_pending) == 0) { | ||
2411 | mddev->in_sync = 1; | ||
2412 | mddev->sb_dirty = 1; | ||
2413 | } | ||
2414 | spin_unlock_irq(&mddev->write_lock); | ||
2415 | } else { | ||
2416 | mddev->ro = 0; | ||
2417 | mddev->recovery_cp = MaxSector; | ||
2418 | err = do_md_run(mddev); | ||
2419 | } | ||
2420 | break; | ||
2421 | case active: | ||
2422 | if (mddev->pers) { | ||
2423 | restart_array(mddev); | ||
2424 | mddev->sb_dirty = 0; | ||
2425 | wake_up(&mddev->sb_wait); | ||
2426 | err = 0; | ||
2427 | } else { | ||
2428 | mddev->ro = 0; | ||
2429 | err = do_md_run(mddev); | ||
2430 | } | ||
2431 | break; | ||
2432 | case write_pending: | ||
2433 | case active_idle: | ||
2434 | /* these cannot be set */ | ||
2435 | break; | ||
2436 | } | ||
2437 | if (err) | ||
2438 | return err; | ||
2439 | else | ||
2440 | return len; | ||
2441 | } | ||
2442 | static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); | ||
2443 | |||
2444 | static ssize_t | ||
2070 | null_show(mddev_t *mddev, char *page) | 2445 | null_show(mddev_t *mddev, char *page) |
2071 | { | 2446 | { |
2072 | return -EINVAL; | 2447 | return -EINVAL; |
@@ -2428,11 +2803,15 @@ __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | |||
2428 | 2803 | ||
2429 | static struct attribute *md_default_attrs[] = { | 2804 | static struct attribute *md_default_attrs[] = { |
2430 | &md_level.attr, | 2805 | &md_level.attr, |
2806 | &md_layout.attr, | ||
2431 | &md_raid_disks.attr, | 2807 | &md_raid_disks.attr, |
2432 | &md_chunk_size.attr, | 2808 | &md_chunk_size.attr, |
2433 | &md_size.attr, | 2809 | &md_size.attr, |
2810 | &md_resync_start.attr, | ||
2434 | &md_metadata.attr, | 2811 | &md_metadata.attr, |
2435 | &md_new_device.attr, | 2812 | &md_new_device.attr, |
2813 | &md_safe_delay.attr, | ||
2814 | &md_array_state.attr, | ||
2436 | NULL, | 2815 | NULL, |
2437 | }; | 2816 | }; |
2438 | 2817 | ||
@@ -2553,8 +2932,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2553 | return NULL; | 2932 | return NULL; |
2554 | } | 2933 | } |
2555 | 2934 | ||
2556 | void md_wakeup_thread(mdk_thread_t *thread); | ||
2557 | |||
2558 | static void md_safemode_timeout(unsigned long data) | 2935 | static void md_safemode_timeout(unsigned long data) |
2559 | { | 2936 | { |
2560 | mddev_t *mddev = (mddev_t *) data; | 2937 | mddev_t *mddev = (mddev_t *) data; |
@@ -2708,7 +3085,7 @@ static int do_md_run(mddev_t * mddev) | |||
2708 | mddev->safemode = 0; | 3085 | mddev->safemode = 0; |
2709 | mddev->safemode_timer.function = md_safemode_timeout; | 3086 | mddev->safemode_timer.function = md_safemode_timeout; |
2710 | mddev->safemode_timer.data = (unsigned long) mddev; | 3087 | mddev->safemode_timer.data = (unsigned long) mddev; |
2711 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ | 3088 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
2712 | mddev->in_sync = 1; | 3089 | mddev->in_sync = 1; |
2713 | 3090 | ||
2714 | ITERATE_RDEV(mddev,rdev,tmp) | 3091 | ITERATE_RDEV(mddev,rdev,tmp) |
@@ -2736,6 +3113,36 @@ static int do_md_run(mddev_t * mddev) | |||
2736 | mddev->queue->queuedata = mddev; | 3113 | mddev->queue->queuedata = mddev; |
2737 | mddev->queue->make_request_fn = mddev->pers->make_request; | 3114 | mddev->queue->make_request_fn = mddev->pers->make_request; |
2738 | 3115 | ||
3116 | /* If there is a partially-recovered drive we need to | ||
3117 | * start recovery here. If we leave it to md_check_recovery, | ||
3118 | * it will remove the drives and not do the right thing | ||
3119 | */ | ||
3120 | if (mddev->degraded) { | ||
3121 | struct list_head *rtmp; | ||
3122 | int spares = 0; | ||
3123 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
3124 | if (rdev->raid_disk >= 0 && | ||
3125 | !test_bit(In_sync, &rdev->flags) && | ||
3126 | !test_bit(Faulty, &rdev->flags)) | ||
3127 | /* complete an interrupted recovery */ | ||
3128 | spares++; | ||
3129 | if (spares && mddev->pers->sync_request) { | ||
3130 | mddev->recovery = 0; | ||
3131 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
3132 | mddev->sync_thread = md_register_thread(md_do_sync, | ||
3133 | mddev, | ||
3134 | "%s_resync"); | ||
3135 | if (!mddev->sync_thread) { | ||
3136 | printk(KERN_ERR "%s: could not start resync" | ||
3137 | " thread...\n", | ||
3138 | mdname(mddev)); | ||
3139 | /* leave the spares where they are, it shouldn't hurt */ | ||
3140 | mddev->recovery = 0; | ||
3141 | } else | ||
3142 | md_wakeup_thread(mddev->sync_thread); | ||
3143 | } | ||
3144 | } | ||
3145 | |||
2739 | mddev->changed = 1; | 3146 | mddev->changed = 1; |
2740 | md_new_event(mddev); | 3147 | md_new_event(mddev); |
2741 | return 0; | 3148 | return 0; |
@@ -2769,18 +3176,47 @@ static int restart_array(mddev_t *mddev) | |||
2769 | */ | 3176 | */ |
2770 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3177 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2771 | md_wakeup_thread(mddev->thread); | 3178 | md_wakeup_thread(mddev->thread); |
3179 | md_wakeup_thread(mddev->sync_thread); | ||
2772 | err = 0; | 3180 | err = 0; |
2773 | } else { | 3181 | } else |
2774 | printk(KERN_ERR "md: %s has no personality assigned.\n", | ||
2775 | mdname(mddev)); | ||
2776 | err = -EINVAL; | 3182 | err = -EINVAL; |
2777 | } | ||
2778 | 3183 | ||
2779 | out: | 3184 | out: |
2780 | return err; | 3185 | return err; |
2781 | } | 3186 | } |
2782 | 3187 | ||
2783 | static int do_md_stop(mddev_t * mddev, int ro) | 3188 | /* similar to deny_write_access, but accounts for our holding a reference |
3189 | * to the file ourselves */ | ||
3190 | static int deny_bitmap_write_access(struct file * file) | ||
3191 | { | ||
3192 | struct inode *inode = file->f_mapping->host; | ||
3193 | |||
3194 | spin_lock(&inode->i_lock); | ||
3195 | if (atomic_read(&inode->i_writecount) > 1) { | ||
3196 | spin_unlock(&inode->i_lock); | ||
3197 | return -ETXTBSY; | ||
3198 | } | ||
3199 | atomic_set(&inode->i_writecount, -1); | ||
3200 | spin_unlock(&inode->i_lock); | ||
3201 | |||
3202 | return 0; | ||
3203 | } | ||
3204 | |||
3205 | static void restore_bitmap_write_access(struct file *file) | ||
3206 | { | ||
3207 | struct inode *inode = file->f_mapping->host; | ||
3208 | |||
3209 | spin_lock(&inode->i_lock); | ||
3210 | atomic_set(&inode->i_writecount, 1); | ||
3211 | spin_unlock(&inode->i_lock); | ||
3212 | } | ||
3213 | |||
3214 | /* mode: | ||
3215 | * 0 - completely stop and dis-assemble array | ||
3216 | * 1 - switch to readonly | ||
3217 | * 2 - stop but do not disassemble array | ||
3218 | */ | ||
3219 | static int do_md_stop(mddev_t * mddev, int mode) | ||
2784 | { | 3220 | { |
2785 | int err = 0; | 3221 | int err = 0; |
2786 | struct gendisk *disk = mddev->gendisk; | 3222 | struct gendisk *disk = mddev->gendisk; |
@@ -2792,6 +3228,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2792 | } | 3228 | } |
2793 | 3229 | ||
2794 | if (mddev->sync_thread) { | 3230 | if (mddev->sync_thread) { |
3231 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2795 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 3232 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
2796 | md_unregister_thread(mddev->sync_thread); | 3233 | md_unregister_thread(mddev->sync_thread); |
2797 | mddev->sync_thread = NULL; | 3234 | mddev->sync_thread = NULL; |
@@ -2801,12 +3238,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2801 | 3238 | ||
2802 | invalidate_partition(disk, 0); | 3239 | invalidate_partition(disk, 0); |
2803 | 3240 | ||
2804 | if (ro) { | 3241 | switch(mode) { |
3242 | case 1: /* readonly */ | ||
2805 | err = -ENXIO; | 3243 | err = -ENXIO; |
2806 | if (mddev->ro==1) | 3244 | if (mddev->ro==1) |
2807 | goto out; | 3245 | goto out; |
2808 | mddev->ro = 1; | 3246 | mddev->ro = 1; |
2809 | } else { | 3247 | break; |
3248 | case 0: /* disassemble */ | ||
3249 | case 2: /* stop */ | ||
2810 | bitmap_flush(mddev); | 3250 | bitmap_flush(mddev); |
2811 | md_super_wait(mddev); | 3251 | md_super_wait(mddev); |
2812 | if (mddev->ro) | 3252 | if (mddev->ro) |
@@ -2821,19 +3261,20 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2821 | if (mddev->ro) | 3261 | if (mddev->ro) |
2822 | mddev->ro = 0; | 3262 | mddev->ro = 0; |
2823 | } | 3263 | } |
2824 | if (!mddev->in_sync) { | 3264 | if (!mddev->in_sync || mddev->sb_dirty) { |
2825 | /* mark array as shutdown cleanly */ | 3265 | /* mark array as shutdown cleanly */ |
2826 | mddev->in_sync = 1; | 3266 | mddev->in_sync = 1; |
2827 | md_update_sb(mddev); | 3267 | md_update_sb(mddev); |
2828 | } | 3268 | } |
2829 | if (ro) | 3269 | if (mode == 1) |
2830 | set_disk_ro(disk, 1); | 3270 | set_disk_ro(disk, 1); |
3271 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2831 | } | 3272 | } |
2832 | 3273 | ||
2833 | /* | 3274 | /* |
2834 | * Free resources if final stop | 3275 | * Free resources if final stop |
2835 | */ | 3276 | */ |
2836 | if (!ro) { | 3277 | if (mode == 0) { |
2837 | mdk_rdev_t *rdev; | 3278 | mdk_rdev_t *rdev; |
2838 | struct list_head *tmp; | 3279 | struct list_head *tmp; |
2839 | struct gendisk *disk; | 3280 | struct gendisk *disk; |
@@ -2841,7 +3282,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2841 | 3282 | ||
2842 | bitmap_destroy(mddev); | 3283 | bitmap_destroy(mddev); |
2843 | if (mddev->bitmap_file) { | 3284 | if (mddev->bitmap_file) { |
2844 | atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); | 3285 | restore_bitmap_write_access(mddev->bitmap_file); |
2845 | fput(mddev->bitmap_file); | 3286 | fput(mddev->bitmap_file); |
2846 | mddev->bitmap_file = NULL; | 3287 | mddev->bitmap_file = NULL; |
2847 | } | 3288 | } |
@@ -2857,11 +3298,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2857 | export_array(mddev); | 3298 | export_array(mddev); |
2858 | 3299 | ||
2859 | mddev->array_size = 0; | 3300 | mddev->array_size = 0; |
3301 | mddev->size = 0; | ||
3302 | mddev->raid_disks = 0; | ||
3303 | mddev->recovery_cp = 0; | ||
3304 | |||
2860 | disk = mddev->gendisk; | 3305 | disk = mddev->gendisk; |
2861 | if (disk) | 3306 | if (disk) |
2862 | set_capacity(disk, 0); | 3307 | set_capacity(disk, 0); |
2863 | mddev->changed = 1; | 3308 | mddev->changed = 1; |
2864 | } else | 3309 | } else if (mddev->pers) |
2865 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3310 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
2866 | mdname(mddev)); | 3311 | mdname(mddev)); |
2867 | err = 0; | 3312 | err = 0; |
@@ -3264,6 +3709,17 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
3264 | 3709 | ||
3265 | rdev->raid_disk = -1; | 3710 | rdev->raid_disk = -1; |
3266 | err = bind_rdev_to_array(rdev, mddev); | 3711 | err = bind_rdev_to_array(rdev, mddev); |
3712 | if (!err && !mddev->pers->hot_remove_disk) { | ||
3713 | /* If there is hot_add_disk but no hot_remove_disk | ||
3714 | * then added disks for geometry changes, | ||
3715 | * and should be added immediately. | ||
3716 | */ | ||
3717 | super_types[mddev->major_version]. | ||
3718 | validate_super(mddev, rdev); | ||
3719 | err = mddev->pers->hot_add_disk(mddev, rdev); | ||
3720 | if (err) | ||
3721 | unbind_rdev_from_array(rdev); | ||
3722 | } | ||
3267 | if (err) | 3723 | if (err) |
3268 | export_rdev(rdev); | 3724 | export_rdev(rdev); |
3269 | 3725 | ||
@@ -3434,23 +3890,6 @@ abort_export: | |||
3434 | return err; | 3890 | return err; |
3435 | } | 3891 | } |
3436 | 3892 | ||
3437 | /* similar to deny_write_access, but accounts for our holding a reference | ||
3438 | * to the file ourselves */ | ||
3439 | static int deny_bitmap_write_access(struct file * file) | ||
3440 | { | ||
3441 | struct inode *inode = file->f_mapping->host; | ||
3442 | |||
3443 | spin_lock(&inode->i_lock); | ||
3444 | if (atomic_read(&inode->i_writecount) > 1) { | ||
3445 | spin_unlock(&inode->i_lock); | ||
3446 | return -ETXTBSY; | ||
3447 | } | ||
3448 | atomic_set(&inode->i_writecount, -1); | ||
3449 | spin_unlock(&inode->i_lock); | ||
3450 | |||
3451 | return 0; | ||
3452 | } | ||
3453 | |||
3454 | static int set_bitmap_file(mddev_t *mddev, int fd) | 3893 | static int set_bitmap_file(mddev_t *mddev, int fd) |
3455 | { | 3894 | { |
3456 | int err; | 3895 | int err; |
@@ -3491,12 +3930,17 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
3491 | mddev->pers->quiesce(mddev, 1); | 3930 | mddev->pers->quiesce(mddev, 1); |
3492 | if (fd >= 0) | 3931 | if (fd >= 0) |
3493 | err = bitmap_create(mddev); | 3932 | err = bitmap_create(mddev); |
3494 | if (fd < 0 || err) | 3933 | if (fd < 0 || err) { |
3495 | bitmap_destroy(mddev); | 3934 | bitmap_destroy(mddev); |
3935 | fd = -1; /* make sure to put the file */ | ||
3936 | } | ||
3496 | mddev->pers->quiesce(mddev, 0); | 3937 | mddev->pers->quiesce(mddev, 0); |
3497 | } else if (fd < 0) { | 3938 | } |
3498 | if (mddev->bitmap_file) | 3939 | if (fd < 0) { |
3940 | if (mddev->bitmap_file) { | ||
3941 | restore_bitmap_write_access(mddev->bitmap_file); | ||
3499 | fput(mddev->bitmap_file); | 3942 | fput(mddev->bitmap_file); |
3943 | } | ||
3500 | mddev->bitmap_file = NULL; | 3944 | mddev->bitmap_file = NULL; |
3501 | } | 3945 | } |
3502 | 3946 | ||
@@ -3977,11 +4421,6 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
3977 | goto done_unlock; | 4421 | goto done_unlock; |
3978 | 4422 | ||
3979 | default: | 4423 | default: |
3980 | if (_IOC_TYPE(cmd) == MD_MAJOR) | ||
3981 | printk(KERN_WARNING "md: %s(pid %d) used" | ||
3982 | " obsolete MD ioctl, upgrade your" | ||
3983 | " software to use new ictls.\n", | ||
3984 | current->comm, current->pid); | ||
3985 | err = -EINVAL; | 4424 | err = -EINVAL; |
3986 | goto abort_unlock; | 4425 | goto abort_unlock; |
3987 | } | 4426 | } |
@@ -4586,7 +5025,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
4586 | spin_lock_irq(&mddev->write_lock); | 5025 | spin_lock_irq(&mddev->write_lock); |
4587 | if (mddev->in_sync) { | 5026 | if (mddev->in_sync) { |
4588 | mddev->in_sync = 0; | 5027 | mddev->in_sync = 0; |
4589 | mddev->sb_dirty = 1; | 5028 | mddev->sb_dirty = 3; |
4590 | md_wakeup_thread(mddev->thread); | 5029 | md_wakeup_thread(mddev->thread); |
4591 | } | 5030 | } |
4592 | spin_unlock_irq(&mddev->write_lock); | 5031 | spin_unlock_irq(&mddev->write_lock); |
@@ -4599,7 +5038,7 @@ void md_write_end(mddev_t *mddev) | |||
4599 | if (atomic_dec_and_test(&mddev->writes_pending)) { | 5038 | if (atomic_dec_and_test(&mddev->writes_pending)) { |
4600 | if (mddev->safemode == 2) | 5039 | if (mddev->safemode == 2) |
4601 | md_wakeup_thread(mddev->thread); | 5040 | md_wakeup_thread(mddev->thread); |
4602 | else | 5041 | else if (mddev->safemode_delay) |
4603 | mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); | 5042 | mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); |
4604 | } | 5043 | } |
4605 | } | 5044 | } |
@@ -4620,10 +5059,14 @@ void md_do_sync(mddev_t *mddev) | |||
4620 | struct list_head *tmp; | 5059 | struct list_head *tmp; |
4621 | sector_t last_check; | 5060 | sector_t last_check; |
4622 | int skipped = 0; | 5061 | int skipped = 0; |
5062 | struct list_head *rtmp; | ||
5063 | mdk_rdev_t *rdev; | ||
4623 | 5064 | ||
4624 | /* just incase thread restarts... */ | 5065 | /* just incase thread restarts... */ |
4625 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 5066 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
4626 | return; | 5067 | return; |
5068 | if (mddev->ro) /* never try to sync a read-only array */ | ||
5069 | return; | ||
4627 | 5070 | ||
4628 | /* we overload curr_resync somewhat here. | 5071 | /* we overload curr_resync somewhat here. |
4629 | * 0 == not engaged in resync at all | 5072 | * 0 == not engaged in resync at all |
@@ -4682,17 +5125,30 @@ void md_do_sync(mddev_t *mddev) | |||
4682 | } | 5125 | } |
4683 | } while (mddev->curr_resync < 2); | 5126 | } while (mddev->curr_resync < 2); |
4684 | 5127 | ||
5128 | j = 0; | ||
4685 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 5129 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4686 | /* resync follows the size requested by the personality, | 5130 | /* resync follows the size requested by the personality, |
4687 | * which defaults to physical size, but can be virtual size | 5131 | * which defaults to physical size, but can be virtual size |
4688 | */ | 5132 | */ |
4689 | max_sectors = mddev->resync_max_sectors; | 5133 | max_sectors = mddev->resync_max_sectors; |
4690 | mddev->resync_mismatches = 0; | 5134 | mddev->resync_mismatches = 0; |
5135 | /* we don't use the checkpoint if there's a bitmap */ | ||
5136 | if (!mddev->bitmap && | ||
5137 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
5138 | j = mddev->recovery_cp; | ||
4691 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 5139 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4692 | max_sectors = mddev->size << 1; | 5140 | max_sectors = mddev->size << 1; |
4693 | else | 5141 | else { |
4694 | /* recovery follows the physical size of devices */ | 5142 | /* recovery follows the physical size of devices */ |
4695 | max_sectors = mddev->size << 1; | 5143 | max_sectors = mddev->size << 1; |
5144 | j = MaxSector; | ||
5145 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5146 | if (rdev->raid_disk >= 0 && | ||
5147 | !test_bit(Faulty, &rdev->flags) && | ||
5148 | !test_bit(In_sync, &rdev->flags) && | ||
5149 | rdev->recovery_offset < j) | ||
5150 | j = rdev->recovery_offset; | ||
5151 | } | ||
4696 | 5152 | ||
4697 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); | 5153 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); |
4698 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" | 5154 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" |
@@ -4702,12 +5158,7 @@ void md_do_sync(mddev_t *mddev) | |||
4702 | speed_max(mddev)); | 5158 | speed_max(mddev)); |
4703 | 5159 | ||
4704 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 5160 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
4705 | /* we don't use the checkpoint if there's a bitmap */ | 5161 | |
4706 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap | ||
4707 | && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
4708 | j = mddev->recovery_cp; | ||
4709 | else | ||
4710 | j = 0; | ||
4711 | io_sectors = 0; | 5162 | io_sectors = 0; |
4712 | for (m = 0; m < SYNC_MARKS; m++) { | 5163 | for (m = 0; m < SYNC_MARKS; m++) { |
4713 | mark[m] = jiffies; | 5164 | mark[m] = jiffies; |
@@ -4828,15 +5279,28 @@ void md_do_sync(mddev_t *mddev) | |||
4828 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 5279 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
4829 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | 5280 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && |
4830 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | 5281 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && |
4831 | mddev->curr_resync > 2 && | 5282 | mddev->curr_resync > 2) { |
4832 | mddev->curr_resync >= mddev->recovery_cp) { | 5283 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4833 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5284 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
4834 | printk(KERN_INFO | 5285 | if (mddev->curr_resync >= mddev->recovery_cp) { |
4835 | "md: checkpointing recovery of %s.\n", | 5286 | printk(KERN_INFO |
4836 | mdname(mddev)); | 5287 | "md: checkpointing recovery of %s.\n", |
4837 | mddev->recovery_cp = mddev->curr_resync; | 5288 | mdname(mddev)); |
4838 | } else | 5289 | mddev->recovery_cp = mddev->curr_resync; |
4839 | mddev->recovery_cp = MaxSector; | 5290 | } |
5291 | } else | ||
5292 | mddev->recovery_cp = MaxSector; | ||
5293 | } else { | ||
5294 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
5295 | mddev->curr_resync = MaxSector; | ||
5296 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5297 | if (rdev->raid_disk >= 0 && | ||
5298 | !test_bit(Faulty, &rdev->flags) && | ||
5299 | !test_bit(In_sync, &rdev->flags) && | ||
5300 | rdev->recovery_offset < mddev->curr_resync) | ||
5301 | rdev->recovery_offset = mddev->curr_resync; | ||
5302 | mddev->sb_dirty = 1; | ||
5303 | } | ||
4840 | } | 5304 | } |
4841 | 5305 | ||
4842 | skip: | 5306 | skip: |
@@ -4908,7 +5372,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4908 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5372 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
4909 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5373 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
4910 | mddev->in_sync = 1; | 5374 | mddev->in_sync = 1; |
4911 | mddev->sb_dirty = 1; | 5375 | mddev->sb_dirty = 3; |
4912 | } | 5376 | } |
4913 | if (mddev->safemode == 1) | 5377 | if (mddev->safemode == 1) |
4914 | mddev->safemode = 0; | 5378 | mddev->safemode = 0; |
@@ -4957,6 +5421,8 @@ void md_check_recovery(mddev_t *mddev) | |||
4957 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5421 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); |
4958 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5422 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); |
4959 | 5423 | ||
5424 | if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | ||
5425 | goto unlock; | ||
4960 | /* no recovery is running. | 5426 | /* no recovery is running. |
4961 | * remove any failed drives, then | 5427 | * remove any failed drives, then |
4962 | * add spares if possible. | 5428 | * add spares if possible. |
@@ -4979,6 +5445,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4979 | ITERATE_RDEV(mddev,rdev,rtmp) | 5445 | ITERATE_RDEV(mddev,rdev,rtmp) |
4980 | if (rdev->raid_disk < 0 | 5446 | if (rdev->raid_disk < 0 |
4981 | && !test_bit(Faulty, &rdev->flags)) { | 5447 | && !test_bit(Faulty, &rdev->flags)) { |
5448 | rdev->recovery_offset = 0; | ||
4982 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | 5449 | if (mddev->pers->hot_add_disk(mddev,rdev)) { |
4983 | char nm[20]; | 5450 | char nm[20]; |
4984 | sprintf(nm, "rd%d", rdev->raid_disk); | 5451 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -5216,7 +5683,6 @@ EXPORT_SYMBOL(md_write_end); | |||
5216 | EXPORT_SYMBOL(md_register_thread); | 5683 | EXPORT_SYMBOL(md_register_thread); |
5217 | EXPORT_SYMBOL(md_unregister_thread); | 5684 | EXPORT_SYMBOL(md_unregister_thread); |
5218 | EXPORT_SYMBOL(md_wakeup_thread); | 5685 | EXPORT_SYMBOL(md_wakeup_thread); |
5219 | EXPORT_SYMBOL(md_print_devices); | ||
5220 | EXPORT_SYMBOL(md_check_recovery); | 5686 | EXPORT_SYMBOL(md_check_recovery); |
5221 | MODULE_LICENSE("GPL"); | 5687 | MODULE_LICENSE("GPL"); |
5222 | MODULE_ALIAS("md"); | 5688 | MODULE_ALIAS("md"); |