diff options
-rw-r--r-- | Documentation/md.txt | 22 | ||||
-rw-r--r-- | drivers/md/md.c | 55 | ||||
-rw-r--r-- | drivers/md/md.h | 80 | ||||
-rw-r--r-- | include/linux/raid/md_p.h | 7 |
4 files changed, 125 insertions, 39 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index fc94770f44ab..993fba37b7d1 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -357,14 +357,14 @@ Each directory contains: | |||
357 | written to, that device. | 357 | written to, that device. |
358 | 358 | ||
359 | state | 359 | state |
360 | A file recording the current state of the device in the array | 360 | A file recording the current state of the device in the array |
361 | which can be a comma separated list of | 361 | which can be a comma separated list of |
362 | faulty - device has been kicked from active use due to | 362 | faulty - device has been kicked from active use due to |
363 | a detected fault or it has unacknowledged bad | 363 | a detected fault, or it has unacknowledged bad |
364 | blocks | 364 | blocks |
365 | in_sync - device is a fully in-sync member of the array | 365 | in_sync - device is a fully in-sync member of the array |
366 | writemostly - device will only be subject to read | 366 | writemostly - device will only be subject to read |
367 | requests if there are no other options. | 367 | requests if there are no other options. |
368 | This applies only to raid1 arrays. | 368 | This applies only to raid1 arrays. |
369 | blocked - device has failed, and the failure hasn't been | 369 | blocked - device has failed, and the failure hasn't been |
370 | acknowledged yet by the metadata handler. | 370 | acknowledged yet by the metadata handler. |
@@ -374,6 +374,13 @@ Each directory contains: | |||
374 | This includes spares that are in the process | 374 | This includes spares that are in the process |
375 | of being recovered to | 375 | of being recovered to |
376 | write_error - device has ever seen a write error. | 376 | write_error - device has ever seen a write error. |
377 | want_replacement - device is (mostly) working but probably | ||
378 | should be replaced, either due to errors or | ||
379 | due to user request. | ||
380 | replacement - device is a replacement for another active | ||
381 | device with same raid_disk. | ||
382 | |||
383 | |||
377 | This list may grow in future. | 384 | This list may grow in future. |
378 | This can be written to. | 385 | This can be written to. |
379 | Writing "faulty" simulates a failure on the device. | 386 | Writing "faulty" simulates a failure on the device. |
@@ -386,6 +393,13 @@ Each directory contains: | |||
386 | Writing "in_sync" sets the in_sync flag. | 393 | Writing "in_sync" sets the in_sync flag. |
387 | Writing "write_error" sets writeerrorseen flag. | 394 | Writing "write_error" sets writeerrorseen flag. |
388 | Writing "-write_error" clears writeerrorseen flag. | 395 | Writing "-write_error" clears writeerrorseen flag. |
396 | Writing "want_replacement" is allowed at any time except to a | ||
397 | replacement device or a spare. It sets the flag. | ||
398 | Writing "-want_replacement" is allowed at any time. It clears | ||
399 | the flag. | ||
400 | Writing "replacement" or "-replacement" is only allowed before | ||
401 | starting the array. It sets or clears the flag. | ||
402 | |||
389 | 403 | ||
390 | This file responds to select/poll. Any change to 'faulty' | 404 | This file responds to select/poll. Any change to 'faulty' |
391 | or 'blocked' causes an event. | 405 | or 'blocked' causes an event. |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 0e2288824938..be569eb41a93 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1714,6 +1714,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1714 | } | 1714 | } |
1715 | if (sb->devflags & WriteMostly1) | 1715 | if (sb->devflags & WriteMostly1) |
1716 | set_bit(WriteMostly, &rdev->flags); | 1716 | set_bit(WriteMostly, &rdev->flags); |
1717 | if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) | ||
1718 | set_bit(Replacement, &rdev->flags); | ||
1717 | } else /* MULTIPATH are always insync */ | 1719 | } else /* MULTIPATH are always insync */ |
1718 | set_bit(In_sync, &rdev->flags); | 1720 | set_bit(In_sync, &rdev->flags); |
1719 | 1721 | ||
@@ -1767,6 +1769,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1767 | sb->recovery_offset = | 1769 | sb->recovery_offset = |
1768 | cpu_to_le64(rdev->recovery_offset); | 1770 | cpu_to_le64(rdev->recovery_offset); |
1769 | } | 1771 | } |
1772 | if (test_bit(Replacement, &rdev->flags)) | ||
1773 | sb->feature_map |= | ||
1774 | cpu_to_le32(MD_FEATURE_REPLACEMENT); | ||
1770 | 1775 | ||
1771 | if (mddev->reshape_position != MaxSector) { | 1776 | if (mddev->reshape_position != MaxSector) { |
1772 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | 1777 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); |
@@ -2560,6 +2565,15 @@ state_show(struct md_rdev *rdev, char *page) | |||
2560 | len += sprintf(page+len, "%swrite_error", sep); | 2565 | len += sprintf(page+len, "%swrite_error", sep); |
2561 | sep = ","; | 2566 | sep = ","; |
2562 | } | 2567 | } |
2568 | if (test_bit(WantReplacement, &rdev->flags)) { | ||
2569 | len += sprintf(page+len, "%swant_replacement", sep); | ||
2570 | sep = ","; | ||
2571 | } | ||
2572 | if (test_bit(Replacement, &rdev->flags)) { | ||
2573 | len += sprintf(page+len, "%sreplacement", sep); | ||
2574 | sep = ","; | ||
2575 | } | ||
2576 | |||
2563 | return len+sprintf(page+len, "\n"); | 2577 | return len+sprintf(page+len, "\n"); |
2564 | } | 2578 | } |
2565 | 2579 | ||
@@ -2628,6 +2642,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2628 | } else if (cmd_match(buf, "-write_error")) { | 2642 | } else if (cmd_match(buf, "-write_error")) { |
2629 | clear_bit(WriteErrorSeen, &rdev->flags); | 2643 | clear_bit(WriteErrorSeen, &rdev->flags); |
2630 | err = 0; | 2644 | err = 0; |
2645 | } else if (cmd_match(buf, "want_replacement")) { | ||
2646 | /* Any non-spare device that is not a replacement can | ||
2647 | * become want_replacement at any time, but we then need to | ||
2648 | * check if recovery is needed. | ||
2649 | */ | ||
2650 | if (rdev->raid_disk >= 0 && | ||
2651 | !test_bit(Replacement, &rdev->flags)) | ||
2652 | set_bit(WantReplacement, &rdev->flags); | ||
2653 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | ||
2654 | md_wakeup_thread(rdev->mddev->thread); | ||
2655 | err = 0; | ||
2656 | } else if (cmd_match(buf, "-want_replacement")) { | ||
2657 | /* Clearing 'want_replacement' is always allowed. | ||
2658 | * Once replacements starts it is too late though. | ||
2659 | */ | ||
2660 | err = 0; | ||
2661 | clear_bit(WantReplacement, &rdev->flags); | ||
2662 | } else if (cmd_match(buf, "replacement")) { | ||
2663 | /* Can only set a device as a replacement when array has not | ||
2664 | * yet been started. Once running, replacement is automatic | ||
2665 | * from spares, or by assigning 'slot'. | ||
2666 | */ | ||
2667 | if (rdev->mddev->pers) | ||
2668 | err = -EBUSY; | ||
2669 | else { | ||
2670 | set_bit(Replacement, &rdev->flags); | ||
2671 | err = 0; | ||
2672 | } | ||
2673 | } else if (cmd_match(buf, "-replacement")) { | ||
2674 | /* Similarly, can only clear Replacement before start */ | ||
2675 | if (rdev->mddev->pers) | ||
2676 | err = -EBUSY; | ||
2677 | else { | ||
2678 | clear_bit(Replacement, &rdev->flags); | ||
2679 | err = 0; | ||
2680 | } | ||
2631 | } | 2681 | } |
2632 | if (!err) | 2682 | if (!err) |
2633 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 2683 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
@@ -6717,8 +6767,11 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
6717 | if (test_bit(Faulty, &rdev->flags)) { | 6767 | if (test_bit(Faulty, &rdev->flags)) { |
6718 | seq_printf(seq, "(F)"); | 6768 | seq_printf(seq, "(F)"); |
6719 | continue; | 6769 | continue; |
6720 | } else if (rdev->raid_disk < 0) | 6770 | } |
6771 | if (rdev->raid_disk < 0) | ||
6721 | seq_printf(seq, "(S)"); /* spare */ | 6772 | seq_printf(seq, "(S)"); /* spare */ |
6773 | if (test_bit(Replacement, &rdev->flags)) | ||
6774 | seq_printf(seq, "(R)"); | ||
6722 | sectors += rdev->sectors; | 6775 | sectors += rdev->sectors; |
6723 | } | 6776 | } |
6724 | 6777 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 670c10e6b484..44c63dfeeb2b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -72,34 +72,7 @@ struct md_rdev { | |||
72 | * This reduces the burden of testing multiple flags in many cases | 72 | * This reduces the burden of testing multiple flags in many cases |
73 | */ | 73 | */ |
74 | 74 | ||
75 | unsigned long flags; | 75 | unsigned long flags; /* bit set of 'enum flag_bits' bits. */ |
76 | #define Faulty 1 /* device is known to have a fault */ | ||
77 | #define In_sync 2 /* device is in_sync with rest of array */ | ||
78 | #define WriteMostly 4 /* Avoid reading if at all possible */ | ||
79 | #define AutoDetected 7 /* added by auto-detect */ | ||
80 | #define Blocked 8 /* An error occurred but has not yet | ||
81 | * been acknowledged by the metadata | ||
82 | * handler, so don't allow writes | ||
83 | * until it is cleared */ | ||
84 | #define WriteErrorSeen 9 /* A write error has been seen on this | ||
85 | * device | ||
86 | */ | ||
87 | #define FaultRecorded 10 /* Intermediate state for clearing | ||
88 | * Blocked. The Fault is/will-be | ||
89 | * recorded in the metadata, but that | ||
90 | * metadata hasn't been stored safely | ||
91 | * on disk yet. | ||
92 | */ | ||
93 | #define BlockedBadBlocks 11 /* A writer is blocked because they | ||
94 | * found an unacknowledged bad-block. | ||
95 | * This can safely be cleared at any | ||
96 | * time, and the writer will re-check. | ||
97 | * It may be set at any time, and at | ||
98 | * worst the writer will timeout and | ||
99 | * re-check. So setting it as | ||
100 | * accurately as possible is good, but | ||
101 | * not absolutely critical. | ||
102 | */ | ||
103 | wait_queue_head_t blocked_wait; | 76 | wait_queue_head_t blocked_wait; |
104 | 77 | ||
105 | int desc_nr; /* descriptor index in the superblock */ | 78 | int desc_nr; /* descriptor index in the superblock */ |
@@ -152,6 +125,44 @@ struct md_rdev { | |||
152 | sector_t size; /* in sectors */ | 125 | sector_t size; /* in sectors */ |
153 | } badblocks; | 126 | } badblocks; |
154 | }; | 127 | }; |
128 | enum flag_bits { | ||
129 | Faulty, /* device is known to have a fault */ | ||
130 | In_sync, /* device is in_sync with rest of array */ | ||
131 | WriteMostly, /* Avoid reading if at all possible */ | ||
132 | AutoDetected, /* added by auto-detect */ | ||
133 | Blocked, /* An error occurred but has not yet | ||
134 | * been acknowledged by the metadata | ||
135 | * handler, so don't allow writes | ||
136 | * until it is cleared */ | ||
137 | WriteErrorSeen, /* A write error has been seen on this | ||
138 | * device | ||
139 | */ | ||
140 | FaultRecorded, /* Intermediate state for clearing | ||
141 | * Blocked. The Fault is/will-be | ||
142 | * recorded in the metadata, but that | ||
143 | * metadata hasn't been stored safely | ||
144 | * on disk yet. | ||
145 | */ | ||
146 | BlockedBadBlocks, /* A writer is blocked because they | ||
147 | * found an unacknowledged bad-block. | ||
148 | * This can safely be cleared at any | ||
149 | * time, and the writer will re-check. | ||
150 | * It may be set at any time, and at | ||
151 | * worst the writer will timeout and | ||
152 | * re-check. So setting it as | ||
153 | * accurately as possible is good, but | ||
154 | * not absolutely critical. | ||
155 | */ | ||
156 | WantReplacement, /* This device is a candidate to be | ||
157 | * hot-replaced, either because it has | ||
158 | * reported some faults, or because | ||
159 | * of explicit request. | ||
160 | */ | ||
161 | Replacement, /* This device is a replacement for | ||
162 | * a want_replacement device with same | ||
163 | * raid_disk number. | ||
164 | */ | ||
165 | }; | ||
155 | 166 | ||
156 | #define BB_LEN_MASK (0x00000000000001FFULL) | 167 | #define BB_LEN_MASK (0x00000000000001FFULL) |
157 | #define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) | 168 | #define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) |
@@ -482,15 +493,20 @@ static inline char * mdname (struct mddev * mddev) | |||
482 | static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) | 493 | static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) |
483 | { | 494 | { |
484 | char nm[20]; | 495 | char nm[20]; |
485 | sprintf(nm, "rd%d", rdev->raid_disk); | 496 | if (!test_bit(Replacement, &rdev->flags)) { |
486 | return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | 497 | sprintf(nm, "rd%d", rdev->raid_disk); |
498 | return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
499 | } else | ||
500 | return 0; | ||
487 | } | 501 | } |
488 | 502 | ||
489 | static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) | 503 | static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) |
490 | { | 504 | { |
491 | char nm[20]; | 505 | char nm[20]; |
492 | sprintf(nm, "rd%d", rdev->raid_disk); | 506 | if (!test_bit(Replacement, &rdev->flags)) { |
493 | sysfs_remove_link(&mddev->kobj, nm); | 507 | sprintf(nm, "rd%d", rdev->raid_disk); |
508 | sysfs_remove_link(&mddev->kobj, nm); | ||
509 | } | ||
494 | } | 510 | } |
495 | 511 | ||
496 | /* | 512 | /* |
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 9e65d9e20662..6f6df86f1ae5 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h | |||
@@ -277,7 +277,10 @@ struct mdp_superblock_1 { | |||
277 | */ | 277 | */ |
278 | #define MD_FEATURE_RESHAPE_ACTIVE 4 | 278 | #define MD_FEATURE_RESHAPE_ACTIVE 4 |
279 | #define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ | 279 | #define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ |
280 | 280 | #define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an | |
281 | #define MD_FEATURE_ALL (1|2|4|8) | 281 | * active device with same 'role'. |
282 | * 'recovery_offset' is also set. | ||
283 | */ | ||
284 | #define MD_FEATURE_ALL (1|2|4|8|16) | ||
282 | 285 | ||
283 | #endif | 286 | #endif |