aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c149
-rw-r--r--include/linux/raid/raid5.h2
2 files changed, 115 insertions, 36 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d9521aa69461..42439a4c1c51 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2077,36 +2077,101 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
2077 2077
2078} 2078}
2079 2079
2080/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
2081 * to process
2082 */
2083static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
2084 struct stripe_head_state *s, int disk_idx, int disks)
2085{
2086 struct r5dev *dev = &sh->dev[disk_idx];
2087 struct r5dev *failed_dev = &sh->dev[s->failed_num];
2088
2089 /* don't schedule compute operations or reads on the parity block while
2090 * a check is in flight
2091 */
2092 if ((disk_idx == sh->pd_idx) &&
2093 test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
2094 return ~0;
2095
2096 /* is the data in this block needed, and can we get it? */
2097 if (!test_bit(R5_LOCKED, &dev->flags) &&
2098 !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
2099 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2100 s->syncing || s->expanding || (s->failed &&
2101 (failed_dev->toread || (failed_dev->towrite &&
2102 !test_bit(R5_OVERWRITE, &failed_dev->flags)
2103 ))))) {
2104 /* 1/ We would like to get this block, possibly by computing it,
2105 * but we might not be able to.
2106 *
2107 * 2/ Since parity check operations potentially make the parity
2108 * block !uptodate it will need to be refreshed before any
2109 * compute operations on data disks are scheduled.
2110 *
2111 * 3/ We hold off parity block re-reads until check operations
2112 * have quiesced.
2113 */
2114 if ((s->uptodate == disks - 1) &&
2115 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
2116 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2117 set_bit(R5_Wantcompute, &dev->flags);
2118 sh->ops.target = disk_idx;
2119 s->req_compute = 1;
2120 sh->ops.count++;
2121 /* Careful: from this point on 'uptodate' is in the eye
2122 * of raid5_run_ops which services 'compute' operations
2123 * before writes. R5_Wantcompute flags a block that will
2124 * be R5_UPTODATE by the time it is needed for a
2125 * subsequent operation.
2126 */
2127 s->uptodate++;
2128 return 0; /* uptodate + compute == disks */
2129 } else if ((s->uptodate < disks - 1) &&
2130 test_bit(R5_Insync, &dev->flags)) {
2131 /* Note: we hold off compute operations while checks are
2132 * in flight, but we still prefer 'compute' over 'read'
2133 * hence we only read if (uptodate < * disks-1)
2134 */
2135 set_bit(R5_LOCKED, &dev->flags);
2136 set_bit(R5_Wantread, &dev->flags);
2137 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2138 sh->ops.count++;
2139 s->locked++;
2140 pr_debug("Reading block %d (sync=%d)\n", disk_idx,
2141 s->syncing);
2142 }
2143 }
2144
2145 return ~0;
2146}
2147
2080static void handle_issuing_new_read_requests5(struct stripe_head *sh, 2148static void handle_issuing_new_read_requests5(struct stripe_head *sh,
2081 struct stripe_head_state *s, int disks) 2149 struct stripe_head_state *s, int disks)
2082{ 2150{
2083 int i; 2151 int i;
2084 for (i = disks; i--; ) { 2152
2085 struct r5dev *dev = &sh->dev[i]; 2153 /* Clear completed compute operations. Parity recovery
2086 if (!test_bit(R5_LOCKED, &dev->flags) && 2154 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
2087 !test_bit(R5_UPTODATE, &dev->flags) && 2155 * later on in this routine
2088 (dev->toread || 2156 */
2089 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || 2157 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2090 s->syncing || s->expanding || 2158 !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2091 (s->failed && (sh->dev[s->failed_num].toread || 2159 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2092 (sh->dev[s->failed_num].towrite && 2160 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2093 !test_bit(R5_OVERWRITE, &sh->dev[s->failed_num].flags)) 2161 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2094 )))) { 2162 }
2095 /* we would like to get this block, possibly 2163
2096 * by computing it, but we might not be able to 2164 /* look for blocks to read/compute, skip this if a compute
2097 */ 2165 * is already in flight, or if the stripe contents are in the
2098 if (s->uptodate == disks-1) { 2166 * midst of changing due to a write
2099 pr_debug("Computing block %d\n", i); 2167 */
2100 compute_block(sh, i); 2168 if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
2101 s->uptodate++; 2169 !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
2102 } else if (test_bit(R5_Insync, &dev->flags)) { 2170 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2103 set_bit(R5_LOCKED, &dev->flags); 2171 for (i = disks; i--; )
2104 set_bit(R5_Wantread, &dev->flags); 2172 if (__handle_issuing_new_read_requests5(
2105 s->locked++; 2173 sh, s, i, disks) == 0)
2106 pr_debug("Reading block %d (sync=%d)\n", 2174 break;
2107 i, s->syncing);
2108 }
2109 }
2110 } 2175 }
2111 set_bit(STRIPE_HANDLE, &sh->state); 2176 set_bit(STRIPE_HANDLE, &sh->state);
2112} 2177}
@@ -2223,7 +2288,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2223 struct r5dev *dev = &sh->dev[i]; 2288 struct r5dev *dev = &sh->dev[i];
2224 if ((dev->towrite || i == sh->pd_idx) && 2289 if ((dev->towrite || i == sh->pd_idx) &&
2225 !test_bit(R5_LOCKED, &dev->flags) && 2290 !test_bit(R5_LOCKED, &dev->flags) &&
2226 !test_bit(R5_UPTODATE, &dev->flags)) { 2291 !(test_bit(R5_UPTODATE, &dev->flags) ||
2292 test_bit(R5_Wantcompute, &dev->flags))) {
2227 if (test_bit(R5_Insync, &dev->flags)) 2293 if (test_bit(R5_Insync, &dev->flags))
2228 rmw++; 2294 rmw++;
2229 else 2295 else
@@ -2232,9 +2298,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2232 /* Would I have to read this buffer for reconstruct_write */ 2298 /* Would I have to read this buffer for reconstruct_write */
2233 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && 2299 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
2234 !test_bit(R5_LOCKED, &dev->flags) && 2300 !test_bit(R5_LOCKED, &dev->flags) &&
2235 !test_bit(R5_UPTODATE, &dev->flags)) { 2301 !(test_bit(R5_UPTODATE, &dev->flags) ||
2236 if (test_bit(R5_Insync, &dev->flags)) 2302 test_bit(R5_Wantcompute, &dev->flags))) {
2237 rcw++; 2303 if (test_bit(R5_Insync, &dev->flags)) rcw++;
2238 else 2304 else
2239 rcw += 2*disks; 2305 rcw += 2*disks;
2240 } 2306 }
@@ -2248,7 +2314,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2248 struct r5dev *dev = &sh->dev[i]; 2314 struct r5dev *dev = &sh->dev[i];
2249 if ((dev->towrite || i == sh->pd_idx) && 2315 if ((dev->towrite || i == sh->pd_idx) &&
2250 !test_bit(R5_LOCKED, &dev->flags) && 2316 !test_bit(R5_LOCKED, &dev->flags) &&
2251 !test_bit(R5_UPTODATE, &dev->flags) && 2317 !(test_bit(R5_UPTODATE, &dev->flags) ||
2318 test_bit(R5_Wantcompute, &dev->flags)) &&
2252 test_bit(R5_Insync, &dev->flags)) { 2319 test_bit(R5_Insync, &dev->flags)) {
2253 if ( 2320 if (
2254 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 2321 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
@@ -2270,7 +2337,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2270 if (!test_bit(R5_OVERWRITE, &dev->flags) && 2337 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2271 i != sh->pd_idx && 2338 i != sh->pd_idx &&
2272 !test_bit(R5_LOCKED, &dev->flags) && 2339 !test_bit(R5_LOCKED, &dev->flags) &&
2273 !test_bit(R5_UPTODATE, &dev->flags) && 2340 !(test_bit(R5_UPTODATE, &dev->flags) ||
2341 test_bit(R5_Wantcompute, &dev->flags)) &&
2274 test_bit(R5_Insync, &dev->flags)) { 2342 test_bit(R5_Insync, &dev->flags)) {
2275 if ( 2343 if (
2276 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 2344 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
@@ -2288,8 +2356,17 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2288 /* now if nothing is locked, and if we have enough data, 2356 /* now if nothing is locked, and if we have enough data,
2289 * we can start a write request 2357 * we can start a write request
2290 */ 2358 */
2291 if (s->locked == 0 && (rcw == 0 || rmw == 0) && 2359 /* since handle_stripe can be called at any time we need to handle the
2292 !test_bit(STRIPE_BIT_DELAY, &sh->state)) 2360 * case where a compute block operation has been submitted and then a
2361 * subsequent call wants to start a write request. raid5_run_ops only
2362 * handles the case where compute block and postxor are requested
2363 * simultaneously. If this is not the case then new writes need to be
2364 * held off until the compute completes.
2365 */
2366 if ((s->req_compute ||
2367 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
2368 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2369 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2293 s->locked += handle_write_operations5(sh, rcw == 0, 0); 2370 s->locked += handle_write_operations5(sh, rcw == 0, 0);
2294} 2371}
2295 2372
@@ -2650,6 +2727,7 @@ static void handle_stripe5(struct stripe_head *sh)
2650 /* now count some things */ 2727 /* now count some things */
2651 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 2728 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
2652 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 2729 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
2730 if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
2653 2731
2654 if (dev->toread) 2732 if (dev->toread)
2655 s.to_read++; 2733 s.to_read++;
@@ -2706,7 +2784,8 @@ static void handle_stripe5(struct stripe_head *sh)
2706 * or to load a block that is being partially written. 2784 * or to load a block that is being partially written.
2707 */ 2785 */
2708 if (s.to_read || s.non_overwrite || 2786 if (s.to_read || s.non_overwrite ||
2709 (s.syncing && (s.uptodate < disks)) || s.expanding) 2787 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
2788 test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
2710 handle_issuing_new_read_requests5(sh, &s, disks); 2789 handle_issuing_new_read_requests5(sh, &s, disks);
2711 2790
2712 /* Now we check to see if any write operations have recently 2791 /* Now we check to see if any write operations have recently
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 6fb9d94e6f2e..2293015de1d5 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -200,7 +200,7 @@ struct stripe_head {
200struct stripe_head_state { 200struct stripe_head_state {
201 int syncing, expanding, expanded; 201 int syncing, expanding, expanded;
202 int locked, uptodate, to_read, to_write, failed, written; 202 int locked, uptodate, to_read, to_write, failed, written;
203 int non_overwrite; 203 int compute, req_compute, non_overwrite;
204 int failed_num; 204 int failed_num;
205}; 205};
206 206