aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c206
1 files changed, 139 insertions, 67 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 08f806379b07..3c31f7f8aa65 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2901,91 +2901,163 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2901 struct stripe_head_state *s, 2901 struct stripe_head_state *s,
2902 struct r6_state *r6s, int disks) 2902 struct r6_state *r6s, int disks)
2903{ 2903{
2904 int update_p = 0, update_q = 0;
2905 struct r5dev *dev;
2906 int pd_idx = sh->pd_idx; 2904 int pd_idx = sh->pd_idx;
2907 int qd_idx = sh->qd_idx; 2905 int qd_idx = sh->qd_idx;
2908 unsigned long cpu; 2906 struct r5dev *dev;
2909 struct page *tmp_page;
2910 2907
2911 set_bit(STRIPE_HANDLE, &sh->state); 2908 set_bit(STRIPE_HANDLE, &sh->state);
2912 2909
2913 BUG_ON(s->failed > 2); 2910 BUG_ON(s->failed > 2);
2914 BUG_ON(s->uptodate < disks); 2911
2915 /* Want to check and possibly repair P and Q. 2912 /* Want to check and possibly repair P and Q.
2916 * However there could be one 'failed' device, in which 2913 * However there could be one 'failed' device, in which
2917 * case we can only check one of them, possibly using the 2914 * case we can only check one of them, possibly using the
2918 * other to generate missing data 2915 * other to generate missing data
2919 */ 2916 */
2920 cpu = get_cpu(); 2917
2921 tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page; 2918 switch (sh->check_state) {
2922 if (s->failed == r6s->q_failed) { 2919 case check_state_idle:
2923 /* The only possible failed device holds 'Q', so it 2920 /* start a new check operation if there are < 2 failures */
2924 * makes sense to check P (If anything else were failed, 2921 if (s->failed == r6s->q_failed) {
2925 * we would have used P to recreate it). 2922 /* The only possible failed device holds Q, so it
2926 */ 2923 * makes sense to check P (If anything else were failed,
2927 compute_block_1(sh, pd_idx, 1); 2924 * we would have used P to recreate it).
2928 if (!page_is_zero(sh->dev[pd_idx].page)) { 2925 */
2929 compute_block_1(sh, pd_idx, 0); 2926 sh->check_state = check_state_run;
2930 update_p = 1;
2931 } 2927 }
2932 } 2928 if (!r6s->q_failed && s->failed < 2) {
2933 if (!r6s->q_failed && s->failed < 2) { 2929 /* Q is not failed, and we didn't use it to generate
2934 /* q is not failed, and we didn't use it to generate 2930 * anything, so it makes sense to check it
2935 * anything, so it makes sense to check it 2931 */
2936 */ 2932 if (sh->check_state == check_state_run)
2937 memcpy(page_address(tmp_page), 2933 sh->check_state = check_state_run_pq;
2938 page_address(sh->dev[qd_idx].page), 2934 else
2939 STRIPE_SIZE); 2935 sh->check_state = check_state_run_q;
2940 compute_parity6(sh, UPDATE_PARITY);
2941 if (memcmp(page_address(tmp_page),
2942 page_address(sh->dev[qd_idx].page),
2943 STRIPE_SIZE) != 0) {
2944 clear_bit(STRIPE_INSYNC, &sh->state);
2945 update_q = 1;
2946 } 2936 }
2947 }
2948 put_cpu();
2949 2937
2950 if (update_p || update_q) { 2938 /* discard potentially stale zero_sum_result */
2951 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2939 sh->ops.zero_sum_result = 0;
2952 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2953 /* don't try to repair!! */
2954 update_p = update_q = 0;
2955 }
2956 2940
2957 /* now write out any block on a failed drive, 2941 if (sh->check_state == check_state_run) {
2958 * or P or Q if they need it 2942 /* async_xor_zero_sum destroys the contents of P */
2959 */ 2943 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2944 s->uptodate--;
2945 }
2946 if (sh->check_state >= check_state_run &&
2947 sh->check_state <= check_state_run_pq) {
2948 /* async_syndrome_zero_sum preserves P and Q, so
2949 * no need to mark them !uptodate here
2950 */
2951 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2952 break;
2953 }
2960 2954
2961 if (s->failed == 2) { 2955 /* we have 2-disk failure */
2962 dev = &sh->dev[r6s->failed_num[1]]; 2956 BUG_ON(s->failed != 2);
2963 s->locked++; 2957 /* fall through */
2964 set_bit(R5_LOCKED, &dev->flags); 2958 case check_state_compute_result:
2965 set_bit(R5_Wantwrite, &dev->flags); 2959 sh->check_state = check_state_idle;
2966 }
2967 if (s->failed >= 1) {
2968 dev = &sh->dev[r6s->failed_num[0]];
2969 s->locked++;
2970 set_bit(R5_LOCKED, &dev->flags);
2971 set_bit(R5_Wantwrite, &dev->flags);
2972 }
2973 2960
2974 if (update_p) { 2961 /* check that a write has not made the stripe insync */
2975 dev = &sh->dev[pd_idx]; 2962 if (test_bit(STRIPE_INSYNC, &sh->state))
2976 s->locked++; 2963 break;
2977 set_bit(R5_LOCKED, &dev->flags);
2978 set_bit(R5_Wantwrite, &dev->flags);
2979 }
2980 if (update_q) {
2981 dev = &sh->dev[qd_idx];
2982 s->locked++;
2983 set_bit(R5_LOCKED, &dev->flags);
2984 set_bit(R5_Wantwrite, &dev->flags);
2985 }
2986 clear_bit(STRIPE_DEGRADED, &sh->state);
2987 2964
2988 set_bit(STRIPE_INSYNC, &sh->state); 2965 /* now write out any block on a failed drive,
2966 * or P or Q if they were recomputed
2967 */
2968 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2969 if (s->failed == 2) {
2970 dev = &sh->dev[r6s->failed_num[1]];
2971 s->locked++;
2972 set_bit(R5_LOCKED, &dev->flags);
2973 set_bit(R5_Wantwrite, &dev->flags);
2974 }
2975 if (s->failed >= 1) {
2976 dev = &sh->dev[r6s->failed_num[0]];
2977 s->locked++;
2978 set_bit(R5_LOCKED, &dev->flags);
2979 set_bit(R5_Wantwrite, &dev->flags);
2980 }
2981 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2982 dev = &sh->dev[pd_idx];
2983 s->locked++;
2984 set_bit(R5_LOCKED, &dev->flags);
2985 set_bit(R5_Wantwrite, &dev->flags);
2986 }
2987 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2988 dev = &sh->dev[qd_idx];
2989 s->locked++;
2990 set_bit(R5_LOCKED, &dev->flags);
2991 set_bit(R5_Wantwrite, &dev->flags);
2992 }
2993 clear_bit(STRIPE_DEGRADED, &sh->state);
2994
2995 set_bit(STRIPE_INSYNC, &sh->state);
2996 break;
2997 case check_state_run:
2998 case check_state_run_q:
2999 case check_state_run_pq:
3000 break; /* we will be called again upon completion */
3001 case check_state_check_result:
3002 sh->check_state = check_state_idle;
3003
3004 /* handle a successful check operation, if parity is correct
3005 * we are done. Otherwise update the mismatch count and repair
3006 * parity if !MD_RECOVERY_CHECK
3007 */
3008 if (sh->ops.zero_sum_result == 0) {
3009 /* both parities are correct */
3010 if (!s->failed)
3011 set_bit(STRIPE_INSYNC, &sh->state);
3012 else {
3013 /* in contrast to the raid5 case we can validate
3014 * parity, but still have a failure to write
3015 * back
3016 */
3017 sh->check_state = check_state_compute_result;
3018 /* Returning at this point means that we may go
3019 * off and bring p and/or q uptodate again so
3020 * we make sure to check zero_sum_result again
3021 * to verify if p or q need writeback
3022 */
3023 }
3024 } else {
3025 conf->mddev->resync_mismatches += STRIPE_SECTORS;
3026 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
3027 /* don't try to repair!! */
3028 set_bit(STRIPE_INSYNC, &sh->state);
3029 else {
3030 int *target = &sh->ops.target;
3031
3032 sh->ops.target = -1;
3033 sh->ops.target2 = -1;
3034 sh->check_state = check_state_compute_run;
3035 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
3036 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
3037 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
3038 set_bit(R5_Wantcompute,
3039 &sh->dev[pd_idx].flags);
3040 *target = pd_idx;
3041 target = &sh->ops.target2;
3042 s->uptodate++;
3043 }
3044 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
3045 set_bit(R5_Wantcompute,
3046 &sh->dev[qd_idx].flags);
3047 *target = qd_idx;
3048 s->uptodate++;
3049 }
3050 }
3051 }
3052 break;
3053 case check_state_compute_run:
3054 break;
3055 default:
3056 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
3057 __func__, sh->check_state,
3058 (unsigned long long) sh->sector);
3059 BUG();
3060 }
2989} 3061}
2990 3062
2991static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, 3063static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,