diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-10-16 01:25:22 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-10-16 01:25:22 -0400 |
commit | 417b8d4ac868cf58d6c68f52d72f7648413e0edc (patch) | |
tree | 6eefb8b381fdad5235d2b29fc0054bcf537efbcc | |
parent | dce3a7a42d585b74ce68081010b42afe81c8f4c4 (diff) |
md/raid456: downlevel multicore operations to raid_run_ops
The percpu conversion allowed a straightforward handoff of stripe
processing to the async subsytem that initially showed some modest gains
(+4%). However, this model is too simplistic and leads to stripes
bouncing between raid5d and the async thread pool for every invocation
of handle_stripe(). As reported by Holger this can fall into a
pathological situation severely impacting throughput (6x performance
loss).
By downleveling the parallelism to raid_run_ops the pathological
stripe_head bouncing is eliminated. This version still exhibits an
average 11% throughput loss for:
mdadm --create /dev/md0 /dev/sd[b-q] -n 16 -l 6
echo 1024 > /sys/block/md0/md/stripe_cache_size
dd if=/dev/zero of=/dev/md0 bs=1024k count=2048
...but the results are at least stable and can be used as a base for
further multicore experimentation.
Reported-by: Holger Kiehl <Holger.Kiehl@dwd.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 75 | ||||
-rw-r--r-- | drivers/md/raid5.h | 12 |
2 files changed, 51 insertions, 36 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c3e596778618..25c3c29134d1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1139,7 +1139,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu | |||
1139 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | 1139 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | 1142 | static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
1143 | { | 1143 | { |
1144 | int overlap_clear = 0, i, disks = sh->disks; | 1144 | int overlap_clear = 0, i, disks = sh->disks; |
1145 | struct dma_async_tx_descriptor *tx = NULL; | 1145 | struct dma_async_tx_descriptor *tx = NULL; |
@@ -1204,6 +1204,36 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
1204 | put_cpu(); | 1204 | put_cpu(); |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1208 | static void async_run_ops(void *param, async_cookie_t cookie) | ||
1209 | { | ||
1210 | struct stripe_head *sh = param; | ||
1211 | unsigned long ops_request = sh->ops.request; | ||
1212 | |||
1213 | clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state); | ||
1214 | wake_up(&sh->ops.wait_for_ops); | ||
1215 | |||
1216 | __raid_run_ops(sh, ops_request); | ||
1217 | release_stripe(sh); | ||
1218 | } | ||
1219 | |||
1220 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | ||
1221 | { | ||
1222 | /* since handle_stripe can be called outside of raid5d context | ||
1223 | * we need to ensure sh->ops.request is de-staged before another | ||
1224 | * request arrives | ||
1225 | */ | ||
1226 | wait_event(sh->ops.wait_for_ops, | ||
1227 | !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state)); | ||
1228 | sh->ops.request = ops_request; | ||
1229 | |||
1230 | atomic_inc(&sh->count); | ||
1231 | async_schedule(async_run_ops, sh); | ||
1232 | } | ||
1233 | #else | ||
1234 | #define raid_run_ops __raid_run_ops | ||
1235 | #endif | ||
1236 | |||
1207 | static int grow_one_stripe(raid5_conf_t *conf) | 1237 | static int grow_one_stripe(raid5_conf_t *conf) |
1208 | { | 1238 | { |
1209 | struct stripe_head *sh; | 1239 | struct stripe_head *sh; |
@@ -1213,6 +1243,9 @@ static int grow_one_stripe(raid5_conf_t *conf) | |||
1213 | memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); | 1243 | memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); |
1214 | sh->raid_conf = conf; | 1244 | sh->raid_conf = conf; |
1215 | spin_lock_init(&sh->lock); | 1245 | spin_lock_init(&sh->lock); |
1246 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1247 | init_waitqueue_head(&sh->ops.wait_for_ops); | ||
1248 | #endif | ||
1216 | 1249 | ||
1217 | if (grow_buffers(sh, conf->raid_disks)) { | 1250 | if (grow_buffers(sh, conf->raid_disks)) { |
1218 | shrink_buffers(sh, conf->raid_disks); | 1251 | shrink_buffers(sh, conf->raid_disks); |
@@ -1329,6 +1362,9 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1329 | 1362 | ||
1330 | nsh->raid_conf = conf; | 1363 | nsh->raid_conf = conf; |
1331 | spin_lock_init(&nsh->lock); | 1364 | spin_lock_init(&nsh->lock); |
1365 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1366 | init_waitqueue_head(&nsh->ops.wait_for_ops); | ||
1367 | #endif | ||
1332 | 1368 | ||
1333 | list_add(&nsh->lru, &newstripes); | 1369 | list_add(&nsh->lru, &newstripes); |
1334 | } | 1370 | } |
@@ -4342,37 +4378,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4342 | return handled; | 4378 | return handled; |
4343 | } | 4379 | } |
4344 | 4380 | ||
4345 | #ifdef CONFIG_MULTICORE_RAID456 | ||
4346 | static void __process_stripe(void *param, async_cookie_t cookie) | ||
4347 | { | ||
4348 | struct stripe_head *sh = param; | ||
4349 | |||
4350 | handle_stripe(sh); | ||
4351 | release_stripe(sh); | ||
4352 | } | ||
4353 | |||
4354 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4355 | { | ||
4356 | async_schedule_domain(__process_stripe, sh, domain); | ||
4357 | } | ||
4358 | |||
4359 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4360 | { | ||
4361 | async_synchronize_full_domain(domain); | ||
4362 | } | ||
4363 | #else | ||
4364 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4365 | { | ||
4366 | handle_stripe(sh); | ||
4367 | release_stripe(sh); | ||
4368 | cond_resched(); | ||
4369 | } | ||
4370 | |||
4371 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4372 | { | ||
4373 | } | ||
4374 | #endif | ||
4375 | |||
4376 | 4381 | ||
4377 | /* | 4382 | /* |
4378 | * This is our raid5 kernel thread. | 4383 | * This is our raid5 kernel thread. |
@@ -4386,7 +4391,6 @@ static void raid5d(mddev_t *mddev) | |||
4386 | struct stripe_head *sh; | 4391 | struct stripe_head *sh; |
4387 | raid5_conf_t *conf = mddev->private; | 4392 | raid5_conf_t *conf = mddev->private; |
4388 | int handled; | 4393 | int handled; |
4389 | LIST_HEAD(raid_domain); | ||
4390 | 4394 | ||
4391 | pr_debug("+++ raid5d active\n"); | 4395 | pr_debug("+++ raid5d active\n"); |
4392 | 4396 | ||
@@ -4423,7 +4427,9 @@ static void raid5d(mddev_t *mddev) | |||
4423 | spin_unlock_irq(&conf->device_lock); | 4427 | spin_unlock_irq(&conf->device_lock); |
4424 | 4428 | ||
4425 | handled++; | 4429 | handled++; |
4426 | process_stripe(sh, &raid_domain); | 4430 | handle_stripe(sh); |
4431 | release_stripe(sh); | ||
4432 | cond_resched(); | ||
4427 | 4433 | ||
4428 | spin_lock_irq(&conf->device_lock); | 4434 | spin_lock_irq(&conf->device_lock); |
4429 | } | 4435 | } |
@@ -4431,7 +4437,6 @@ static void raid5d(mddev_t *mddev) | |||
4431 | 4437 | ||
4432 | spin_unlock_irq(&conf->device_lock); | 4438 | spin_unlock_irq(&conf->device_lock); |
4433 | 4439 | ||
4434 | synchronize_stripe_processing(&raid_domain); | ||
4435 | async_tx_issue_pending_all(); | 4440 | async_tx_issue_pending_all(); |
4436 | unplug_slaves(mddev); | 4441 | unplug_slaves(mddev); |
4437 | 4442 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 2390e0e83daf..dcefdc9629ee 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -214,12 +214,20 @@ struct stripe_head { | |||
214 | int disks; /* disks in stripe */ | 214 | int disks; /* disks in stripe */ |
215 | enum check_states check_state; | 215 | enum check_states check_state; |
216 | enum reconstruct_states reconstruct_state; | 216 | enum reconstruct_states reconstruct_state; |
217 | /* stripe_operations | 217 | /** |
218 | * struct stripe_operations | ||
218 | * @target - STRIPE_OP_COMPUTE_BLK target | 219 | * @target - STRIPE_OP_COMPUTE_BLK target |
220 | * @target2 - 2nd compute target in the raid6 case | ||
221 | * @zero_sum_result - P and Q verification flags | ||
222 | * @request - async service request flags for raid_run_ops | ||
219 | */ | 223 | */ |
220 | struct stripe_operations { | 224 | struct stripe_operations { |
221 | int target, target2; | 225 | int target, target2; |
222 | enum sum_check_flags zero_sum_result; | 226 | enum sum_check_flags zero_sum_result; |
227 | #ifdef CONFIG_MULTICORE_RAID456 | ||
228 | unsigned long request; | ||
229 | wait_queue_head_t wait_for_ops; | ||
230 | #endif | ||
223 | } ops; | 231 | } ops; |
224 | struct r5dev { | 232 | struct r5dev { |
225 | struct bio req; | 233 | struct bio req; |
@@ -294,6 +302,8 @@ struct r6_state { | |||
294 | #define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ | 302 | #define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ |
295 | #define STRIPE_BIOFILL_RUN 14 | 303 | #define STRIPE_BIOFILL_RUN 14 |
296 | #define STRIPE_COMPUTE_RUN 15 | 304 | #define STRIPE_COMPUTE_RUN 15 |
305 | #define STRIPE_OPS_REQ_PENDING 16 | ||
306 | |||
297 | /* | 307 | /* |
298 | * Operation request flags | 308 | * Operation request flags |
299 | */ | 309 | */ |