diff options
author | Dan Williams <dan.j.williams@intel.com> | 2007-01-02 15:52:30 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2007-07-13 11:06:16 -0400 |
commit | d84e0f10d38393f617227f0c831a99c69294651f (patch) | |
tree | 5d0836f024f1f13ac6f1b2b2d3004244cb0fa649 | |
parent | 91c00924846a0034020451c280c76baa4299f9dc (diff) |
md: common infrastructure for running operations with raid5_run_ops
All the handle_stripe operations that are to be transitioned to use
raid5_run_ops need a method to coherently gather work under the stripe-lock
and hand that work off to raid5_run_ops. The 'get_stripe_work' routine
runs under the lock to read all the bits in sh->ops.pending that do not
have the corresponding bit set in sh->ops.ack. This modified 'pending'
bitmap is then passed to raid5_run_ops for processing.
The transition from 'ack' to 'completion' does not need similar protection
as the existing release_stripe infrastructure will guarantee that
handle_stripe will run again after a completion bit is set, and
handle_stripe can tolerate a sh->ops.completed bit being set while the lock
is held.
A call to async_tx_issue_pending_all() is added to raid5d to kick the
offload engines once all pending stripe operations work has been submitted.
This enables batching of the submission and completion of operations.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 67 |
1 files changed, 58 insertions, 9 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0b7002479655..d89a25e7c17b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -141,6 +141,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
141 | } | 141 | } |
142 | md_wakeup_thread(conf->mddev->thread); | 142 | md_wakeup_thread(conf->mddev->thread); |
143 | } else { | 143 | } else { |
144 | BUG_ON(sh->ops.pending); | ||
144 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 145 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
145 | atomic_dec(&conf->preread_active_stripes); | 146 | atomic_dec(&conf->preread_active_stripes); |
146 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) | 147 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) |
@@ -242,7 +243,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int | |||
242 | 243 | ||
243 | BUG_ON(atomic_read(&sh->count) != 0); | 244 | BUG_ON(atomic_read(&sh->count) != 0); |
244 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); | 245 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); |
245 | 246 | BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); | |
247 | |||
246 | CHECK_DEVLOCK(); | 248 | CHECK_DEVLOCK(); |
247 | pr_debug("init_stripe called, stripe %llu\n", | 249 | pr_debug("init_stripe called, stripe %llu\n", |
248 | (unsigned long long)sh->sector); | 250 | (unsigned long long)sh->sector); |
@@ -258,11 +260,11 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int | |||
258 | for (i = sh->disks; i--; ) { | 260 | for (i = sh->disks; i--; ) { |
259 | struct r5dev *dev = &sh->dev[i]; | 261 | struct r5dev *dev = &sh->dev[i]; |
260 | 262 | ||
261 | if (dev->toread || dev->towrite || dev->written || | 263 | if (dev->toread || dev->read || dev->towrite || dev->written || |
262 | test_bit(R5_LOCKED, &dev->flags)) { | 264 | test_bit(R5_LOCKED, &dev->flags)) { |
263 | printk("sector=%llx i=%d %p %p %p %d\n", | 265 | printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n", |
264 | (unsigned long long)sh->sector, i, dev->toread, | 266 | (unsigned long long)sh->sector, i, dev->toread, |
265 | dev->towrite, dev->written, | 267 | dev->read, dev->towrite, dev->written, |
266 | test_bit(R5_LOCKED, &dev->flags)); | 268 | test_bit(R5_LOCKED, &dev->flags)); |
267 | BUG(); | 269 | BUG(); |
268 | } | 270 | } |
@@ -342,6 +344,44 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
342 | return sh; | 344 | return sh; |
343 | } | 345 | } |
344 | 346 | ||
347 | /* test_and_ack_op() ensures that we only dequeue an operation once */ | ||
348 | #define test_and_ack_op(op, pend) \ | ||
349 | do { \ | ||
350 | if (test_bit(op, &sh->ops.pending) && \ | ||
351 | !test_bit(op, &sh->ops.complete)) { \ | ||
352 | if (test_and_set_bit(op, &sh->ops.ack)) \ | ||
353 | clear_bit(op, &pend); \ | ||
354 | else \ | ||
355 | ack++; \ | ||
356 | } else \ | ||
357 | clear_bit(op, &pend); \ | ||
358 | } while (0) | ||
359 | |||
360 | /* find new work to run, do not resubmit work that is already | ||
361 | * in flight | ||
362 | */ | ||
363 | static unsigned long get_stripe_work(struct stripe_head *sh) | ||
364 | { | ||
365 | unsigned long pending; | ||
366 | int ack = 0; | ||
367 | |||
368 | pending = sh->ops.pending; | ||
369 | |||
370 | test_and_ack_op(STRIPE_OP_BIOFILL, pending); | ||
371 | test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending); | ||
372 | test_and_ack_op(STRIPE_OP_PREXOR, pending); | ||
373 | test_and_ack_op(STRIPE_OP_BIODRAIN, pending); | ||
374 | test_and_ack_op(STRIPE_OP_POSTXOR, pending); | ||
375 | test_and_ack_op(STRIPE_OP_CHECK, pending); | ||
376 | if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
377 | ack++; | ||
378 | |||
379 | sh->ops.count -= ack; | ||
380 | BUG_ON(sh->ops.count < 0); | ||
381 | |||
382 | return pending; | ||
383 | } | ||
384 | |||
345 | static int | 385 | static int |
346 | raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); | 386 | raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); |
347 | static int | 387 | static int |
@@ -2494,7 +2534,6 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2494 | * schedule a write of some buffers | 2534 | * schedule a write of some buffers |
2495 | * return confirmation of parity correctness | 2535 | * return confirmation of parity correctness |
2496 | * | 2536 | * |
2497 | * Parity calculations are done inside the stripe lock | ||
2498 | * buffers are taken off read_list or write_list, and bh_cache buffers | 2537 | * buffers are taken off read_list or write_list, and bh_cache buffers |
2499 | * get BH_Lock set before the stripe lock is released. | 2538 | * get BH_Lock set before the stripe lock is released. |
2500 | * | 2539 | * |
@@ -2507,11 +2546,13 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2507 | struct bio *return_bi = NULL; | 2546 | struct bio *return_bi = NULL; |
2508 | struct stripe_head_state s; | 2547 | struct stripe_head_state s; |
2509 | struct r5dev *dev; | 2548 | struct r5dev *dev; |
2549 | unsigned long pending = 0; | ||
2510 | 2550 | ||
2511 | memset(&s, 0, sizeof(s)); | 2551 | memset(&s, 0, sizeof(s)); |
2512 | pr_debug("handling stripe %llu, cnt=%d, pd_idx=%d\n", | 2552 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
2513 | (unsigned long long)sh->sector, atomic_read(&sh->count), | 2553 | "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, |
2514 | sh->pd_idx); | 2554 | atomic_read(&sh->count), sh->pd_idx, |
2555 | sh->ops.pending, sh->ops.ack, sh->ops.complete); | ||
2515 | 2556 | ||
2516 | spin_lock(&sh->lock); | 2557 | spin_lock(&sh->lock); |
2517 | clear_bit(STRIPE_HANDLE, &sh->state); | 2558 | clear_bit(STRIPE_HANDLE, &sh->state); |
@@ -2674,8 +2715,14 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2674 | if (s.expanding && s.locked == 0) | 2715 | if (s.expanding && s.locked == 0) |
2675 | handle_stripe_expansion(conf, sh, NULL); | 2716 | handle_stripe_expansion(conf, sh, NULL); |
2676 | 2717 | ||
2718 | if (sh->ops.count) | ||
2719 | pending = get_stripe_work(sh); | ||
2720 | |||
2677 | spin_unlock(&sh->lock); | 2721 | spin_unlock(&sh->lock); |
2678 | 2722 | ||
2723 | if (pending) | ||
2724 | raid5_run_ops(sh, pending); | ||
2725 | |||
2679 | return_io(return_bi); | 2726 | return_io(return_bi); |
2680 | 2727 | ||
2681 | for (i=disks; i-- ;) { | 2728 | for (i=disks; i-- ;) { |
@@ -3798,8 +3845,10 @@ static void raid5d (mddev_t *mddev) | |||
3798 | handled++; | 3845 | handled++; |
3799 | } | 3846 | } |
3800 | 3847 | ||
3801 | if (list_empty(&conf->handle_list)) | 3848 | if (list_empty(&conf->handle_list)) { |
3849 | async_tx_issue_pending_all(); | ||
3802 | break; | 3850 | break; |
3851 | } | ||
3803 | 3852 | ||
3804 | first = conf->handle_list.next; | 3853 | first = conf->handle_list.next; |
3805 | sh = list_entry(first, struct stripe_head, lru); | 3854 | sh = list_entry(first, struct stripe_head, lru); |