diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-07-14 16:40:57 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-08-29 22:13:13 -0400 |
commit | d82dfee0ad8f240fef1b28e2258891c07da57367 (patch) | |
tree | 44431399bef701c52f413c364f80751c18ff1179 /drivers/md | |
parent | a9b39a741a7e3b262b9f51fefb68e17b32756999 (diff) |
md/raid6: asynchronous handle_parity_check6
[ Based on an original patch by Yuri Tikhonov ]
Implement the state machine for handling the RAID-6 parities check and
repair functionality. Note that the raid6 case does not need to check
for new failures, like raid5, as it will always writeback the correct
disks. The raid5 case can be updated to check zero_sum_result to avoid
getting confused by new failures rather than retrying the entire check
operation.
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 206 |
1 files changed, 139 insertions, 67 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 08f806379b07..3c31f7f8aa65 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2901,91 +2901,163 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2901 | struct stripe_head_state *s, | 2901 | struct stripe_head_state *s, |
2902 | struct r6_state *r6s, int disks) | 2902 | struct r6_state *r6s, int disks) |
2903 | { | 2903 | { |
2904 | int update_p = 0, update_q = 0; | ||
2905 | struct r5dev *dev; | ||
2906 | int pd_idx = sh->pd_idx; | 2904 | int pd_idx = sh->pd_idx; |
2907 | int qd_idx = sh->qd_idx; | 2905 | int qd_idx = sh->qd_idx; |
2908 | unsigned long cpu; | 2906 | struct r5dev *dev; |
2909 | struct page *tmp_page; | ||
2910 | 2907 | ||
2911 | set_bit(STRIPE_HANDLE, &sh->state); | 2908 | set_bit(STRIPE_HANDLE, &sh->state); |
2912 | 2909 | ||
2913 | BUG_ON(s->failed > 2); | 2910 | BUG_ON(s->failed > 2); |
2914 | BUG_ON(s->uptodate < disks); | 2911 | |
2915 | /* Want to check and possibly repair P and Q. | 2912 | /* Want to check and possibly repair P and Q. |
2916 | * However there could be one 'failed' device, in which | 2913 | * However there could be one 'failed' device, in which |
2917 | * case we can only check one of them, possibly using the | 2914 | * case we can only check one of them, possibly using the |
2918 | * other to generate missing data | 2915 | * other to generate missing data |
2919 | */ | 2916 | */ |
2920 | cpu = get_cpu(); | 2917 | |
2921 | tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page; | 2918 | switch (sh->check_state) { |
2922 | if (s->failed == r6s->q_failed) { | 2919 | case check_state_idle: |
2923 | /* The only possible failed device holds 'Q', so it | 2920 | /* start a new check operation if there are < 2 failures */ |
2924 | * makes sense to check P (If anything else were failed, | 2921 | if (s->failed == r6s->q_failed) { |
2925 | * we would have used P to recreate it). | 2922 | /* The only possible failed device holds Q, so it |
2926 | */ | 2923 | * makes sense to check P (If anything else were failed, |
2927 | compute_block_1(sh, pd_idx, 1); | 2924 | * we would have used P to recreate it). |
2928 | if (!page_is_zero(sh->dev[pd_idx].page)) { | 2925 | */ |
2929 | compute_block_1(sh, pd_idx, 0); | 2926 | sh->check_state = check_state_run; |
2930 | update_p = 1; | ||
2931 | } | 2927 | } |
2932 | } | 2928 | if (!r6s->q_failed && s->failed < 2) { |
2933 | if (!r6s->q_failed && s->failed < 2) { | 2929 | /* Q is not failed, and we didn't use it to generate |
2934 | /* q is not failed, and we didn't use it to generate | 2930 | * anything, so it makes sense to check it |
2935 | * anything, so it makes sense to check it | 2931 | */ |
2936 | */ | 2932 | if (sh->check_state == check_state_run) |
2937 | memcpy(page_address(tmp_page), | 2933 | sh->check_state = check_state_run_pq; |
2938 | page_address(sh->dev[qd_idx].page), | 2934 | else |
2939 | STRIPE_SIZE); | 2935 | sh->check_state = check_state_run_q; |
2940 | compute_parity6(sh, UPDATE_PARITY); | ||
2941 | if (memcmp(page_address(tmp_page), | ||
2942 | page_address(sh->dev[qd_idx].page), | ||
2943 | STRIPE_SIZE) != 0) { | ||
2944 | clear_bit(STRIPE_INSYNC, &sh->state); | ||
2945 | update_q = 1; | ||
2946 | } | 2936 | } |
2947 | } | ||
2948 | put_cpu(); | ||
2949 | 2937 | ||
2950 | if (update_p || update_q) { | 2938 | /* discard potentially stale zero_sum_result */ |
2951 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 2939 | sh->ops.zero_sum_result = 0; |
2952 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
2953 | /* don't try to repair!! */ | ||
2954 | update_p = update_q = 0; | ||
2955 | } | ||
2956 | 2940 | ||
2957 | /* now write out any block on a failed drive, | 2941 | if (sh->check_state == check_state_run) { |
2958 | * or P or Q if they need it | 2942 | /* async_xor_zero_sum destroys the contents of P */ |
2959 | */ | 2943 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); |
2944 | s->uptodate--; | ||
2945 | } | ||
2946 | if (sh->check_state >= check_state_run && | ||
2947 | sh->check_state <= check_state_run_pq) { | ||
2948 | /* async_syndrome_zero_sum preserves P and Q, so | ||
2949 | * no need to mark them !uptodate here | ||
2950 | */ | ||
2951 | set_bit(STRIPE_OP_CHECK, &s->ops_request); | ||
2952 | break; | ||
2953 | } | ||
2960 | 2954 | ||
2961 | if (s->failed == 2) { | 2955 | /* we have 2-disk failure */ |
2962 | dev = &sh->dev[r6s->failed_num[1]]; | 2956 | BUG_ON(s->failed != 2); |
2963 | s->locked++; | 2957 | /* fall through */ |
2964 | set_bit(R5_LOCKED, &dev->flags); | 2958 | case check_state_compute_result: |
2965 | set_bit(R5_Wantwrite, &dev->flags); | 2959 | sh->check_state = check_state_idle; |
2966 | } | ||
2967 | if (s->failed >= 1) { | ||
2968 | dev = &sh->dev[r6s->failed_num[0]]; | ||
2969 | s->locked++; | ||
2970 | set_bit(R5_LOCKED, &dev->flags); | ||
2971 | set_bit(R5_Wantwrite, &dev->flags); | ||
2972 | } | ||
2973 | 2960 | ||
2974 | if (update_p) { | 2961 | /* check that a write has not made the stripe insync */ |
2975 | dev = &sh->dev[pd_idx]; | 2962 | if (test_bit(STRIPE_INSYNC, &sh->state)) |
2976 | s->locked++; | 2963 | break; |
2977 | set_bit(R5_LOCKED, &dev->flags); | ||
2978 | set_bit(R5_Wantwrite, &dev->flags); | ||
2979 | } | ||
2980 | if (update_q) { | ||
2981 | dev = &sh->dev[qd_idx]; | ||
2982 | s->locked++; | ||
2983 | set_bit(R5_LOCKED, &dev->flags); | ||
2984 | set_bit(R5_Wantwrite, &dev->flags); | ||
2985 | } | ||
2986 | clear_bit(STRIPE_DEGRADED, &sh->state); | ||
2987 | 2964 | ||
2988 | set_bit(STRIPE_INSYNC, &sh->state); | 2965 | /* now write out any block on a failed drive, |
2966 | * or P or Q if they were recomputed | ||
2967 | */ | ||
2968 | BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ | ||
2969 | if (s->failed == 2) { | ||
2970 | dev = &sh->dev[r6s->failed_num[1]]; | ||
2971 | s->locked++; | ||
2972 | set_bit(R5_LOCKED, &dev->flags); | ||
2973 | set_bit(R5_Wantwrite, &dev->flags); | ||
2974 | } | ||
2975 | if (s->failed >= 1) { | ||
2976 | dev = &sh->dev[r6s->failed_num[0]]; | ||
2977 | s->locked++; | ||
2978 | set_bit(R5_LOCKED, &dev->flags); | ||
2979 | set_bit(R5_Wantwrite, &dev->flags); | ||
2980 | } | ||
2981 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | ||
2982 | dev = &sh->dev[pd_idx]; | ||
2983 | s->locked++; | ||
2984 | set_bit(R5_LOCKED, &dev->flags); | ||
2985 | set_bit(R5_Wantwrite, &dev->flags); | ||
2986 | } | ||
2987 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { | ||
2988 | dev = &sh->dev[qd_idx]; | ||
2989 | s->locked++; | ||
2990 | set_bit(R5_LOCKED, &dev->flags); | ||
2991 | set_bit(R5_Wantwrite, &dev->flags); | ||
2992 | } | ||
2993 | clear_bit(STRIPE_DEGRADED, &sh->state); | ||
2994 | |||
2995 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2996 | break; | ||
2997 | case check_state_run: | ||
2998 | case check_state_run_q: | ||
2999 | case check_state_run_pq: | ||
3000 | break; /* we will be called again upon completion */ | ||
3001 | case check_state_check_result: | ||
3002 | sh->check_state = check_state_idle; | ||
3003 | |||
3004 | /* handle a successful check operation, if parity is correct | ||
3005 | * we are done. Otherwise update the mismatch count and repair | ||
3006 | * parity if !MD_RECOVERY_CHECK | ||
3007 | */ | ||
3008 | if (sh->ops.zero_sum_result == 0) { | ||
3009 | /* both parities are correct */ | ||
3010 | if (!s->failed) | ||
3011 | set_bit(STRIPE_INSYNC, &sh->state); | ||
3012 | else { | ||
3013 | /* in contrast to the raid5 case we can validate | ||
3014 | * parity, but still have a failure to write | ||
3015 | * back | ||
3016 | */ | ||
3017 | sh->check_state = check_state_compute_result; | ||
3018 | /* Returning at this point means that we may go | ||
3019 | * off and bring p and/or q uptodate again so | ||
3020 | * we make sure to check zero_sum_result again | ||
3021 | * to verify if p or q need writeback | ||
3022 | */ | ||
3023 | } | ||
3024 | } else { | ||
3025 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | ||
3026 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
3027 | /* don't try to repair!! */ | ||
3028 | set_bit(STRIPE_INSYNC, &sh->state); | ||
3029 | else { | ||
3030 | int *target = &sh->ops.target; | ||
3031 | |||
3032 | sh->ops.target = -1; | ||
3033 | sh->ops.target2 = -1; | ||
3034 | sh->check_state = check_state_compute_run; | ||
3035 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
3036 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
3037 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | ||
3038 | set_bit(R5_Wantcompute, | ||
3039 | &sh->dev[pd_idx].flags); | ||
3040 | *target = pd_idx; | ||
3041 | target = &sh->ops.target2; | ||
3042 | s->uptodate++; | ||
3043 | } | ||
3044 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { | ||
3045 | set_bit(R5_Wantcompute, | ||
3046 | &sh->dev[qd_idx].flags); | ||
3047 | *target = qd_idx; | ||
3048 | s->uptodate++; | ||
3049 | } | ||
3050 | } | ||
3051 | } | ||
3052 | break; | ||
3053 | case check_state_compute_run: | ||
3054 | break; | ||
3055 | default: | ||
3056 | printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", | ||
3057 | __func__, sh->check_state, | ||
3058 | (unsigned long long) sh->sector); | ||
3059 | BUG(); | ||
3060 | } | ||
2989 | } | 3061 | } |
2990 | 3062 | ||
2991 | static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | 3063 | static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, |