diff options
-rw-r--r-- | drivers/md/raid5.c | 149 | ||||
-rw-r--r-- | include/linux/raid/raid5.h | 2 |
2 files changed, 115 insertions, 36 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d9521aa69461..42439a4c1c51 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2077,36 +2077,101 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, | |||
2077 | 2077 | ||
2078 | } | 2078 | } |
2079 | 2079 | ||
2080 | /* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks | ||
2081 | * to process | ||
2082 | */ | ||
2083 | static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | ||
2084 | struct stripe_head_state *s, int disk_idx, int disks) | ||
2085 | { | ||
2086 | struct r5dev *dev = &sh->dev[disk_idx]; | ||
2087 | struct r5dev *failed_dev = &sh->dev[s->failed_num]; | ||
2088 | |||
2089 | /* don't schedule compute operations or reads on the parity block while | ||
2090 | * a check is in flight | ||
2091 | */ | ||
2092 | if ((disk_idx == sh->pd_idx) && | ||
2093 | test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) | ||
2094 | return ~0; | ||
2095 | |||
2096 | /* is the data in this block needed, and can we get it? */ | ||
2097 | if (!test_bit(R5_LOCKED, &dev->flags) && | ||
2098 | !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || | ||
2099 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || | ||
2100 | s->syncing || s->expanding || (s->failed && | ||
2101 | (failed_dev->toread || (failed_dev->towrite && | ||
2102 | !test_bit(R5_OVERWRITE, &failed_dev->flags) | ||
2103 | ))))) { | ||
2104 | /* 1/ We would like to get this block, possibly by computing it, | ||
2105 | * but we might not be able to. | ||
2106 | * | ||
2107 | * 2/ Since parity check operations potentially make the parity | ||
2108 | * block !uptodate it will need to be refreshed before any | ||
2109 | * compute operations on data disks are scheduled. | ||
2110 | * | ||
2111 | * 3/ We hold off parity block re-reads until check operations | ||
2112 | * have quiesced. | ||
2113 | */ | ||
2114 | if ((s->uptodate == disks - 1) && | ||
2115 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | ||
2116 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2117 | set_bit(R5_Wantcompute, &dev->flags); | ||
2118 | sh->ops.target = disk_idx; | ||
2119 | s->req_compute = 1; | ||
2120 | sh->ops.count++; | ||
2121 | /* Careful: from this point on 'uptodate' is in the eye | ||
2122 | * of raid5_run_ops which services 'compute' operations | ||
2123 | * before writes. R5_Wantcompute flags a block that will | ||
2124 | * be R5_UPTODATE by the time it is needed for a | ||
2125 | * subsequent operation. | ||
2126 | */ | ||
2127 | s->uptodate++; | ||
2128 | return 0; /* uptodate + compute == disks */ | ||
2129 | } else if ((s->uptodate < disks - 1) && | ||
2130 | test_bit(R5_Insync, &dev->flags)) { | ||
2131 | /* Note: we hold off compute operations while checks are | ||
2132 | * in flight, but we still prefer 'compute' over 'read' | ||
2133 | * hence we only read if (uptodate < * disks-1) | ||
2134 | */ | ||
2135 | set_bit(R5_LOCKED, &dev->flags); | ||
2136 | set_bit(R5_Wantread, &dev->flags); | ||
2137 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2138 | sh->ops.count++; | ||
2139 | s->locked++; | ||
2140 | pr_debug("Reading block %d (sync=%d)\n", disk_idx, | ||
2141 | s->syncing); | ||
2142 | } | ||
2143 | } | ||
2144 | |||
2145 | return ~0; | ||
2146 | } | ||
2147 | |||
2080 | static void handle_issuing_new_read_requests5(struct stripe_head *sh, | 2148 | static void handle_issuing_new_read_requests5(struct stripe_head *sh, |
2081 | struct stripe_head_state *s, int disks) | 2149 | struct stripe_head_state *s, int disks) |
2082 | { | 2150 | { |
2083 | int i; | 2151 | int i; |
2084 | for (i = disks; i--; ) { | 2152 | |
2085 | struct r5dev *dev = &sh->dev[i]; | 2153 | /* Clear completed compute operations. Parity recovery |
2086 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2154 | * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled |
2087 | !test_bit(R5_UPTODATE, &dev->flags) && | 2155 | * later on in this routine |
2088 | (dev->toread || | 2156 | */ |
2089 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || | 2157 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && |
2090 | s->syncing || s->expanding || | 2158 | !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { |
2091 | (s->failed && (sh->dev[s->failed_num].toread || | 2159 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); |
2092 | (sh->dev[s->failed_num].towrite && | 2160 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); |
2093 | !test_bit(R5_OVERWRITE, &sh->dev[s->failed_num].flags)) | 2161 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); |
2094 | )))) { | 2162 | } |
2095 | /* we would like to get this block, possibly | 2163 | |
2096 | * by computing it, but we might not be able to | 2164 | /* look for blocks to read/compute, skip this if a compute |
2097 | */ | 2165 | * is already in flight, or if the stripe contents are in the |
2098 | if (s->uptodate == disks-1) { | 2166 | * midst of changing due to a write |
2099 | pr_debug("Computing block %d\n", i); | 2167 | */ |
2100 | compute_block(sh, i); | 2168 | if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && |
2101 | s->uptodate++; | 2169 | !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && |
2102 | } else if (test_bit(R5_Insync, &dev->flags)) { | 2170 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { |
2103 | set_bit(R5_LOCKED, &dev->flags); | 2171 | for (i = disks; i--; ) |
2104 | set_bit(R5_Wantread, &dev->flags); | 2172 | if (__handle_issuing_new_read_requests5( |
2105 | s->locked++; | 2173 | sh, s, i, disks) == 0) |
2106 | pr_debug("Reading block %d (sync=%d)\n", | 2174 | break; |
2107 | i, s->syncing); | ||
2108 | } | ||
2109 | } | ||
2110 | } | 2175 | } |
2111 | set_bit(STRIPE_HANDLE, &sh->state); | 2176 | set_bit(STRIPE_HANDLE, &sh->state); |
2112 | } | 2177 | } |
@@ -2223,7 +2288,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2223 | struct r5dev *dev = &sh->dev[i]; | 2288 | struct r5dev *dev = &sh->dev[i]; |
2224 | if ((dev->towrite || i == sh->pd_idx) && | 2289 | if ((dev->towrite || i == sh->pd_idx) && |
2225 | !test_bit(R5_LOCKED, &dev->flags) && | 2290 | !test_bit(R5_LOCKED, &dev->flags) && |
2226 | !test_bit(R5_UPTODATE, &dev->flags)) { | 2291 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2292 | test_bit(R5_Wantcompute, &dev->flags))) { | ||
2227 | if (test_bit(R5_Insync, &dev->flags)) | 2293 | if (test_bit(R5_Insync, &dev->flags)) |
2228 | rmw++; | 2294 | rmw++; |
2229 | else | 2295 | else |
@@ -2232,9 +2298,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2232 | /* Would I have to read this buffer for reconstruct_write */ | 2298 | /* Would I have to read this buffer for reconstruct_write */ |
2233 | if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && | 2299 | if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && |
2234 | !test_bit(R5_LOCKED, &dev->flags) && | 2300 | !test_bit(R5_LOCKED, &dev->flags) && |
2235 | !test_bit(R5_UPTODATE, &dev->flags)) { | 2301 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2236 | if (test_bit(R5_Insync, &dev->flags)) | 2302 | test_bit(R5_Wantcompute, &dev->flags))) { |
2237 | rcw++; | 2303 | if (test_bit(R5_Insync, &dev->flags)) rcw++; |
2238 | else | 2304 | else |
2239 | rcw += 2*disks; | 2305 | rcw += 2*disks; |
2240 | } | 2306 | } |
@@ -2248,7 +2314,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2248 | struct r5dev *dev = &sh->dev[i]; | 2314 | struct r5dev *dev = &sh->dev[i]; |
2249 | if ((dev->towrite || i == sh->pd_idx) && | 2315 | if ((dev->towrite || i == sh->pd_idx) && |
2250 | !test_bit(R5_LOCKED, &dev->flags) && | 2316 | !test_bit(R5_LOCKED, &dev->flags) && |
2251 | !test_bit(R5_UPTODATE, &dev->flags) && | 2317 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2318 | test_bit(R5_Wantcompute, &dev->flags)) && | ||
2252 | test_bit(R5_Insync, &dev->flags)) { | 2319 | test_bit(R5_Insync, &dev->flags)) { |
2253 | if ( | 2320 | if ( |
2254 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 2321 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
@@ -2270,7 +2337,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2270 | if (!test_bit(R5_OVERWRITE, &dev->flags) && | 2337 | if (!test_bit(R5_OVERWRITE, &dev->flags) && |
2271 | i != sh->pd_idx && | 2338 | i != sh->pd_idx && |
2272 | !test_bit(R5_LOCKED, &dev->flags) && | 2339 | !test_bit(R5_LOCKED, &dev->flags) && |
2273 | !test_bit(R5_UPTODATE, &dev->flags) && | 2340 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2341 | test_bit(R5_Wantcompute, &dev->flags)) && | ||
2274 | test_bit(R5_Insync, &dev->flags)) { | 2342 | test_bit(R5_Insync, &dev->flags)) { |
2275 | if ( | 2343 | if ( |
2276 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 2344 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
@@ -2288,8 +2356,17 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2288 | /* now if nothing is locked, and if we have enough data, | 2356 | /* now if nothing is locked, and if we have enough data, |
2289 | * we can start a write request | 2357 | * we can start a write request |
2290 | */ | 2358 | */ |
2291 | if (s->locked == 0 && (rcw == 0 || rmw == 0) && | 2359 | /* since handle_stripe can be called at any time we need to handle the |
2292 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) | 2360 | * case where a compute block operation has been submitted and then a |
2361 | * subsequent call wants to start a write request. raid5_run_ops only | ||
2362 | * handles the case where compute block and postxor are requested | ||
2363 | * simultaneously. If this is not the case then new writes need to be | ||
2364 | * held off until the compute completes. | ||
2365 | */ | ||
2366 | if ((s->req_compute || | ||
2367 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) && | ||
2368 | (s->locked == 0 && (rcw == 0 || rmw == 0) && | ||
2369 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) | ||
2293 | s->locked += handle_write_operations5(sh, rcw == 0, 0); | 2370 | s->locked += handle_write_operations5(sh, rcw == 0, 0); |
2294 | } | 2371 | } |
2295 | 2372 | ||
@@ -2650,6 +2727,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2650 | /* now count some things */ | 2727 | /* now count some things */ |
2651 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; | 2728 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; |
2652 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; | 2729 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; |
2730 | if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; | ||
2653 | 2731 | ||
2654 | if (dev->toread) | 2732 | if (dev->toread) |
2655 | s.to_read++; | 2733 | s.to_read++; |
@@ -2706,7 +2784,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2706 | * or to load a block that is being partially written. | 2784 | * or to load a block that is being partially written. |
2707 | */ | 2785 | */ |
2708 | if (s.to_read || s.non_overwrite || | 2786 | if (s.to_read || s.non_overwrite || |
2709 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 2787 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding || |
2788 | test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) | ||
2710 | handle_issuing_new_read_requests5(sh, &s, disks); | 2789 | handle_issuing_new_read_requests5(sh, &s, disks); |
2711 | 2790 | ||
2712 | /* Now we check to see if any write operations have recently | 2791 | /* Now we check to see if any write operations have recently |
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 6fb9d94e6f2e..2293015de1d5 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h | |||
@@ -200,7 +200,7 @@ struct stripe_head { | |||
200 | struct stripe_head_state { | 200 | struct stripe_head_state { |
201 | int syncing, expanding, expanded; | 201 | int syncing, expanding, expanded; |
202 | int locked, uptodate, to_read, to_write, failed, written; | 202 | int locked, uptodate, to_read, to_write, failed, written; |
203 | int non_overwrite; | 203 | int compute, req_compute, non_overwrite; |
204 | int failed_num; | 204 | int failed_num; |
205 | }; | 205 | }; |
206 | 206 | ||