aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-07-14 16:40:57 -0400
committerDan Williams <dan.j.williams@intel.com>2009-08-29 22:13:13 -0400
commitd82dfee0ad8f240fef1b28e2258891c07da57367 (patch)
tree44431399bef701c52f413c364f80751c18ff1179 /drivers/md
parenta9b39a741a7e3b262b9f51fefb68e17b32756999 (diff)
md/raid6: asynchronous handle_parity_check6
[ Based on an original patch by Yuri Tikhonov ] Implement the state machine for handling the RAID-6 parities check and repair functionality. Note that the raid6 case does not need to check for new failures, like raid5, as it will always writeback the correct disks. The raid5 case can be updated to check zero_sum_result to avoid getting confused by new failures rather than retrying the entire check operation. Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c206
1 files changed, 139 insertions, 67 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 08f806379b07..3c31f7f8aa65 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2901,91 +2901,163 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2901 struct stripe_head_state *s, 2901 struct stripe_head_state *s,
2902 struct r6_state *r6s, int disks) 2902 struct r6_state *r6s, int disks)
2903{ 2903{
2904 int update_p = 0, update_q = 0;
2905 struct r5dev *dev;
2906 int pd_idx = sh->pd_idx; 2904 int pd_idx = sh->pd_idx;
2907 int qd_idx = sh->qd_idx; 2905 int qd_idx = sh->qd_idx;
2908 unsigned long cpu; 2906 struct r5dev *dev;
2909 struct page *tmp_page;
2910 2907
2911 set_bit(STRIPE_HANDLE, &sh->state); 2908 set_bit(STRIPE_HANDLE, &sh->state);
2912 2909
2913 BUG_ON(s->failed > 2); 2910 BUG_ON(s->failed > 2);
2914 BUG_ON(s->uptodate < disks); 2911
2915 /* Want to check and possibly repair P and Q. 2912 /* Want to check and possibly repair P and Q.
2916 * However there could be one 'failed' device, in which 2913 * However there could be one 'failed' device, in which
2917 * case we can only check one of them, possibly using the 2914 * case we can only check one of them, possibly using the
2918 * other to generate missing data 2915 * other to generate missing data
2919 */ 2916 */
2920 cpu = get_cpu(); 2917
2921 tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page; 2918 switch (sh->check_state) {
2922 if (s->failed == r6s->q_failed) { 2919 case check_state_idle:
2923 /* The only possible failed device holds 'Q', so it 2920 /* start a new check operation if there are < 2 failures */
2924 * makes sense to check P (If anything else were failed, 2921 if (s->failed == r6s->q_failed) {
2925 * we would have used P to recreate it). 2922 /* The only possible failed device holds Q, so it
2926 */ 2923 * makes sense to check P (If anything else were failed,
2927 compute_block_1(sh, pd_idx, 1); 2924 * we would have used P to recreate it).
2928 if (!page_is_zero(sh->dev[pd_idx].page)) { 2925 */
2929 compute_block_1(sh, pd_idx, 0); 2926 sh->check_state = check_state_run;
2930 update_p = 1;
2931 } 2927 }
2932 } 2928 if (!r6s->q_failed && s->failed < 2) {
2933 if (!r6s->q_failed && s->failed < 2) { 2929 /* Q is not failed, and we didn't use it to generate
2934 /* q is not failed, and we didn't use it to generate 2930 * anything, so it makes sense to check it
2935 * anything, so it makes sense to check it 2931 */
2936 */ 2932 if (sh->check_state == check_state_run)
2937 memcpy(page_address(tmp_page), 2933 sh->check_state = check_state_run_pq;
2938 page_address(sh->dev[qd_idx].page), 2934 else
2939 STRIPE_SIZE); 2935 sh->check_state = check_state_run_q;
2940 compute_parity6(sh, UPDATE_PARITY);
2941 if (memcmp(page_address(tmp_page),
2942 page_address(sh->dev[qd_idx].page),
2943 STRIPE_SIZE) != 0) {
2944 clear_bit(STRIPE_INSYNC, &sh->state);
2945 update_q = 1;
2946 } 2936 }
2947 }
2948 put_cpu();
2949 2937
2950 if (update_p || update_q) { 2938 /* discard potentially stale zero_sum_result */
2951 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2939 sh->ops.zero_sum_result = 0;
2952 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2953 /* don't try to repair!! */
2954 update_p = update_q = 0;
2955 }
2956 2940
2957 /* now write out any block on a failed drive, 2941 if (sh->check_state == check_state_run) {
2958 * or P or Q if they need it 2942 /* async_xor_zero_sum destroys the contents of P */
2959 */ 2943 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2944 s->uptodate--;
2945 }
2946 if (sh->check_state >= check_state_run &&
2947 sh->check_state <= check_state_run_pq) {
2948 /* async_syndrome_zero_sum preserves P and Q, so
2949 * no need to mark them !uptodate here
2950 */
2951 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2952 break;
2953 }
2960 2954
2961 if (s->failed == 2) { 2955 /* we have 2-disk failure */
2962 dev = &sh->dev[r6s->failed_num[1]]; 2956 BUG_ON(s->failed != 2);
2963 s->locked++; 2957 /* fall through */
2964 set_bit(R5_LOCKED, &dev->flags); 2958 case check_state_compute_result:
2965 set_bit(R5_Wantwrite, &dev->flags); 2959 sh->check_state = check_state_idle;
2966 }
2967 if (s->failed >= 1) {
2968 dev = &sh->dev[r6s->failed_num[0]];
2969 s->locked++;
2970 set_bit(R5_LOCKED, &dev->flags);
2971 set_bit(R5_Wantwrite, &dev->flags);
2972 }
2973 2960
2974 if (update_p) { 2961 /* check that a write has not made the stripe insync */
2975 dev = &sh->dev[pd_idx]; 2962 if (test_bit(STRIPE_INSYNC, &sh->state))
2976 s->locked++; 2963 break;
2977 set_bit(R5_LOCKED, &dev->flags);
2978 set_bit(R5_Wantwrite, &dev->flags);
2979 }
2980 if (update_q) {
2981 dev = &sh->dev[qd_idx];
2982 s->locked++;
2983 set_bit(R5_LOCKED, &dev->flags);
2984 set_bit(R5_Wantwrite, &dev->flags);
2985 }
2986 clear_bit(STRIPE_DEGRADED, &sh->state);
2987 2964
2988 set_bit(STRIPE_INSYNC, &sh->state); 2965 /* now write out any block on a failed drive,
2966 * or P or Q if they were recomputed
2967 */
2968 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2969 if (s->failed == 2) {
2970 dev = &sh->dev[r6s->failed_num[1]];
2971 s->locked++;
2972 set_bit(R5_LOCKED, &dev->flags);
2973 set_bit(R5_Wantwrite, &dev->flags);
2974 }
2975 if (s->failed >= 1) {
2976 dev = &sh->dev[r6s->failed_num[0]];
2977 s->locked++;
2978 set_bit(R5_LOCKED, &dev->flags);
2979 set_bit(R5_Wantwrite, &dev->flags);
2980 }
2981 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2982 dev = &sh->dev[pd_idx];
2983 s->locked++;
2984 set_bit(R5_LOCKED, &dev->flags);
2985 set_bit(R5_Wantwrite, &dev->flags);
2986 }
2987 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2988 dev = &sh->dev[qd_idx];
2989 s->locked++;
2990 set_bit(R5_LOCKED, &dev->flags);
2991 set_bit(R5_Wantwrite, &dev->flags);
2992 }
2993 clear_bit(STRIPE_DEGRADED, &sh->state);
2994
2995 set_bit(STRIPE_INSYNC, &sh->state);
2996 break;
2997 case check_state_run:
2998 case check_state_run_q:
2999 case check_state_run_pq:
3000 break; /* we will be called again upon completion */
3001 case check_state_check_result:
3002 sh->check_state = check_state_idle;
3003
3004 /* handle a successful check operation, if parity is correct
3005 * we are done. Otherwise update the mismatch count and repair
3006 * parity if !MD_RECOVERY_CHECK
3007 */
3008 if (sh->ops.zero_sum_result == 0) {
3009 /* both parities are correct */
3010 if (!s->failed)
3011 set_bit(STRIPE_INSYNC, &sh->state);
3012 else {
3013 /* in contrast to the raid5 case we can validate
3014 * parity, but still have a failure to write
3015 * back
3016 */
3017 sh->check_state = check_state_compute_result;
3018 /* Returning at this point means that we may go
3019 * off and bring p and/or q uptodate again so
3020 * we make sure to check zero_sum_result again
3021 * to verify if p or q need writeback
3022 */
3023 }
3024 } else {
3025 conf->mddev->resync_mismatches += STRIPE_SECTORS;
3026 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
3027 /* don't try to repair!! */
3028 set_bit(STRIPE_INSYNC, &sh->state);
3029 else {
3030 int *target = &sh->ops.target;
3031
3032 sh->ops.target = -1;
3033 sh->ops.target2 = -1;
3034 sh->check_state = check_state_compute_run;
3035 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
3036 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
3037 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
3038 set_bit(R5_Wantcompute,
3039 &sh->dev[pd_idx].flags);
3040 *target = pd_idx;
3041 target = &sh->ops.target2;
3042 s->uptodate++;
3043 }
3044 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
3045 set_bit(R5_Wantcompute,
3046 &sh->dev[qd_idx].flags);
3047 *target = qd_idx;
3048 s->uptodate++;
3049 }
3050 }
3051 }
3052 break;
3053 case check_state_compute_run:
3054 break;
3055 default:
3056 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
3057 __func__, sh->check_state,
3058 (unsigned long long) sh->sector);
3059 BUG();
3060 }
2989} 3061}
2990 3062
2991static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, 3063static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,