aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-10-16 01:25:22 -0400
committerNeilBrown <neilb@suse.de>2009-10-16 01:25:22 -0400
commit417b8d4ac868cf58d6c68f52d72f7648413e0edc (patch)
tree6eefb8b381fdad5235d2b29fc0054bcf537efbcc
parentdce3a7a42d585b74ce68081010b42afe81c8f4c4 (diff)
md/raid456: downlevel multicore operations to raid_run_ops
The percpu conversion allowed a straightforward handoff of stripe processing to the async subsytem that initially showed some modest gains (+4%). However, this model is too simplistic and leads to stripes bouncing between raid5d and the async thread pool for every invocation of handle_stripe(). As reported by Holger this can fall into a pathological situation severely impacting throughput (6x performance loss). By downleveling the parallelism to raid_run_ops the pathological stripe_head bouncing is eliminated. This version still exhibits an average 11% throughput loss for: mdadm --create /dev/md0 /dev/sd[b-q] -n 16 -l 6 echo 1024 > /sys/block/md0/md/stripe_cache_size dd if=/dev/zero of=/dev/md0 bs=1024k count=2048 ...but the results are at least stable and can be used as a base for further multicore experimentation. Reported-by: Holger Kiehl <Holger.Kiehl@dwd.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c75
-rw-r--r--drivers/md/raid5.h12
2 files changed, 51 insertions, 36 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c3e596778618..25c3c29134d1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1139,7 +1139,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
1139 &sh->ops.zero_sum_result, percpu->spare_page, &submit); 1139 &sh->ops.zero_sum_result, percpu->spare_page, &submit);
1140} 1140}
1141 1141
1142static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) 1142static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
1143{ 1143{
1144 int overlap_clear = 0, i, disks = sh->disks; 1144 int overlap_clear = 0, i, disks = sh->disks;
1145 struct dma_async_tx_descriptor *tx = NULL; 1145 struct dma_async_tx_descriptor *tx = NULL;
@@ -1204,6 +1204,36 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
1204 put_cpu(); 1204 put_cpu();
1205} 1205}
1206 1206
1207#ifdef CONFIG_MULTICORE_RAID456
1208static void async_run_ops(void *param, async_cookie_t cookie)
1209{
1210 struct stripe_head *sh = param;
1211 unsigned long ops_request = sh->ops.request;
1212
1213 clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state);
1214 wake_up(&sh->ops.wait_for_ops);
1215
1216 __raid_run_ops(sh, ops_request);
1217 release_stripe(sh);
1218}
1219
1220static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
1221{
1222 /* since handle_stripe can be called outside of raid5d context
1223 * we need to ensure sh->ops.request is de-staged before another
1224 * request arrives
1225 */
1226 wait_event(sh->ops.wait_for_ops,
1227 !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state));
1228 sh->ops.request = ops_request;
1229
1230 atomic_inc(&sh->count);
1231 async_schedule(async_run_ops, sh);
1232}
1233#else
1234#define raid_run_ops __raid_run_ops
1235#endif
1236
1207static int grow_one_stripe(raid5_conf_t *conf) 1237static int grow_one_stripe(raid5_conf_t *conf)
1208{ 1238{
1209 struct stripe_head *sh; 1239 struct stripe_head *sh;
@@ -1213,6 +1243,9 @@ static int grow_one_stripe(raid5_conf_t *conf)
1213 memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); 1243 memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
1214 sh->raid_conf = conf; 1244 sh->raid_conf = conf;
1215 spin_lock_init(&sh->lock); 1245 spin_lock_init(&sh->lock);
1246 #ifdef CONFIG_MULTICORE_RAID456
1247 init_waitqueue_head(&sh->ops.wait_for_ops);
1248 #endif
1216 1249
1217 if (grow_buffers(sh, conf->raid_disks)) { 1250 if (grow_buffers(sh, conf->raid_disks)) {
1218 shrink_buffers(sh, conf->raid_disks); 1251 shrink_buffers(sh, conf->raid_disks);
@@ -1329,6 +1362,9 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1329 1362
1330 nsh->raid_conf = conf; 1363 nsh->raid_conf = conf;
1331 spin_lock_init(&nsh->lock); 1364 spin_lock_init(&nsh->lock);
1365 #ifdef CONFIG_MULTICORE_RAID456
1366 init_waitqueue_head(&nsh->ops.wait_for_ops);
1367 #endif
1332 1368
1333 list_add(&nsh->lru, &newstripes); 1369 list_add(&nsh->lru, &newstripes);
1334 } 1370 }
@@ -4342,37 +4378,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4342 return handled; 4378 return handled;
4343} 4379}
4344 4380
4345#ifdef CONFIG_MULTICORE_RAID456
4346static void __process_stripe(void *param, async_cookie_t cookie)
4347{
4348 struct stripe_head *sh = param;
4349
4350 handle_stripe(sh);
4351 release_stripe(sh);
4352}
4353
4354static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4355{
4356 async_schedule_domain(__process_stripe, sh, domain);
4357}
4358
4359static void synchronize_stripe_processing(struct list_head *domain)
4360{
4361 async_synchronize_full_domain(domain);
4362}
4363#else
4364static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4365{
4366 handle_stripe(sh);
4367 release_stripe(sh);
4368 cond_resched();
4369}
4370
4371static void synchronize_stripe_processing(struct list_head *domain)
4372{
4373}
4374#endif
4375
4376 4381
4377/* 4382/*
4378 * This is our raid5 kernel thread. 4383 * This is our raid5 kernel thread.
@@ -4386,7 +4391,6 @@ static void raid5d(mddev_t *mddev)
4386 struct stripe_head *sh; 4391 struct stripe_head *sh;
4387 raid5_conf_t *conf = mddev->private; 4392 raid5_conf_t *conf = mddev->private;
4388 int handled; 4393 int handled;
4389 LIST_HEAD(raid_domain);
4390 4394
4391 pr_debug("+++ raid5d active\n"); 4395 pr_debug("+++ raid5d active\n");
4392 4396
@@ -4423,7 +4427,9 @@ static void raid5d(mddev_t *mddev)
4423 spin_unlock_irq(&conf->device_lock); 4427 spin_unlock_irq(&conf->device_lock);
4424 4428
4425 handled++; 4429 handled++;
4426 process_stripe(sh, &raid_domain); 4430 handle_stripe(sh);
4431 release_stripe(sh);
4432 cond_resched();
4427 4433
4428 spin_lock_irq(&conf->device_lock); 4434 spin_lock_irq(&conf->device_lock);
4429 } 4435 }
@@ -4431,7 +4437,6 @@ static void raid5d(mddev_t *mddev)
4431 4437
4432 spin_unlock_irq(&conf->device_lock); 4438 spin_unlock_irq(&conf->device_lock);
4433 4439
4434 synchronize_stripe_processing(&raid_domain);
4435 async_tx_issue_pending_all(); 4440 async_tx_issue_pending_all();
4436 unplug_slaves(mddev); 4441 unplug_slaves(mddev);
4437 4442
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2390e0e83daf..dcefdc9629ee 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -214,12 +214,20 @@ struct stripe_head {
214 int disks; /* disks in stripe */ 214 int disks; /* disks in stripe */
215 enum check_states check_state; 215 enum check_states check_state;
216 enum reconstruct_states reconstruct_state; 216 enum reconstruct_states reconstruct_state;
217 /* stripe_operations 217 /**
218 * struct stripe_operations
218 * @target - STRIPE_OP_COMPUTE_BLK target 219 * @target - STRIPE_OP_COMPUTE_BLK target
220 * @target2 - 2nd compute target in the raid6 case
221 * @zero_sum_result - P and Q verification flags
222 * @request - async service request flags for raid_run_ops
219 */ 223 */
220 struct stripe_operations { 224 struct stripe_operations {
221 int target, target2; 225 int target, target2;
222 enum sum_check_flags zero_sum_result; 226 enum sum_check_flags zero_sum_result;
227 #ifdef CONFIG_MULTICORE_RAID456
228 unsigned long request;
229 wait_queue_head_t wait_for_ops;
230 #endif
223 } ops; 231 } ops;
224 struct r5dev { 232 struct r5dev {
225 struct bio req; 233 struct bio req;
@@ -294,6 +302,8 @@ struct r6_state {
294#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ 302#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
295#define STRIPE_BIOFILL_RUN 14 303#define STRIPE_BIOFILL_RUN 14
296#define STRIPE_COMPUTE_RUN 15 304#define STRIPE_COMPUTE_RUN 15
305#define STRIPE_OPS_REQ_PENDING 16
306
297/* 307/*
298 * Operation request flags 308 * Operation request flags
299 */ 309 */