aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2007-01-02 15:52:30 -0500
committerDan Williams <dan.j.williams@intel.com>2007-07-13 11:06:16 -0400
commitd84e0f10d38393f617227f0c831a99c69294651f (patch)
tree5d0836f024f1f13ac6f1b2b2d3004244cb0fa649 /drivers
parent91c00924846a0034020451c280c76baa4299f9dc (diff)
md: common infrastructure for running operations with raid5_run_ops
All the handle_stripe operations that are to be transitioned to use raid5_run_ops need a method to coherently gather work under the stripe-lock and hand that work off to raid5_run_ops. The 'get_stripe_work' routine runs under the lock to read all the bits in sh->ops.pending that do not have the corresponding bit set in sh->ops.ack. This modified 'pending' bitmap is then passed to raid5_run_ops for processing. The transition from 'ack' to 'completion' does not need similar protection as the existing release_stripe infrastructure will guarantee that handle_stripe will run again after a completion bit is set, and handle_stripe can tolerate a sh->ops.completed bit being set while the lock is held. A call to async_tx_issue_pending_all() is added to raid5d to kick the offload engines once all pending stripe operations work has been submitted. This enables batching of the submission and completion of operations. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Acked-By: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid5.c67
1 files changed, 58 insertions, 9 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0b7002479655..d89a25e7c17b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -141,6 +141,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
141 } 141 }
142 md_wakeup_thread(conf->mddev->thread); 142 md_wakeup_thread(conf->mddev->thread);
143 } else { 143 } else {
144 BUG_ON(sh->ops.pending);
144 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 145 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
145 atomic_dec(&conf->preread_active_stripes); 146 atomic_dec(&conf->preread_active_stripes);
146 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) 147 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -242,7 +243,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
242 243
243 BUG_ON(atomic_read(&sh->count) != 0); 244 BUG_ON(atomic_read(&sh->count) != 0);
244 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 245 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
245 246 BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
247
246 CHECK_DEVLOCK(); 248 CHECK_DEVLOCK();
247 pr_debug("init_stripe called, stripe %llu\n", 249 pr_debug("init_stripe called, stripe %llu\n",
248 (unsigned long long)sh->sector); 250 (unsigned long long)sh->sector);
@@ -258,11 +260,11 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
258 for (i = sh->disks; i--; ) { 260 for (i = sh->disks; i--; ) {
259 struct r5dev *dev = &sh->dev[i]; 261 struct r5dev *dev = &sh->dev[i];
260 262
261 if (dev->toread || dev->towrite || dev->written || 263 if (dev->toread || dev->read || dev->towrite || dev->written ||
262 test_bit(R5_LOCKED, &dev->flags)) { 264 test_bit(R5_LOCKED, &dev->flags)) {
263 printk("sector=%llx i=%d %p %p %p %d\n", 265 printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
264 (unsigned long long)sh->sector, i, dev->toread, 266 (unsigned long long)sh->sector, i, dev->toread,
265 dev->towrite, dev->written, 267 dev->read, dev->towrite, dev->written,
266 test_bit(R5_LOCKED, &dev->flags)); 268 test_bit(R5_LOCKED, &dev->flags));
267 BUG(); 269 BUG();
268 } 270 }
@@ -342,6 +344,44 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
342 return sh; 344 return sh;
343} 345}
344 346
347/* test_and_ack_op() ensures that we only dequeue an operation once */
348#define test_and_ack_op(op, pend) \
349do { \
350 if (test_bit(op, &sh->ops.pending) && \
351 !test_bit(op, &sh->ops.complete)) { \
352 if (test_and_set_bit(op, &sh->ops.ack)) \
353 clear_bit(op, &pend); \
354 else \
355 ack++; \
356 } else \
357 clear_bit(op, &pend); \
358} while (0)
359
360/* find new work to run, do not resubmit work that is already
361 * in flight
362 */
363static unsigned long get_stripe_work(struct stripe_head *sh)
364{
365 unsigned long pending;
366 int ack = 0;
367
368 pending = sh->ops.pending;
369
370 test_and_ack_op(STRIPE_OP_BIOFILL, pending);
371 test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
372 test_and_ack_op(STRIPE_OP_PREXOR, pending);
373 test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
374 test_and_ack_op(STRIPE_OP_POSTXOR, pending);
375 test_and_ack_op(STRIPE_OP_CHECK, pending);
376 if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
377 ack++;
378
379 sh->ops.count -= ack;
380 BUG_ON(sh->ops.count < 0);
381
382 return pending;
383}
384
345static int 385static int
346raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); 386raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
347static int 387static int
@@ -2494,7 +2534,6 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2494 * schedule a write of some buffers 2534 * schedule a write of some buffers
2495 * return confirmation of parity correctness 2535 * return confirmation of parity correctness
2496 * 2536 *
2497 * Parity calculations are done inside the stripe lock
2498 * buffers are taken off read_list or write_list, and bh_cache buffers 2537 * buffers are taken off read_list or write_list, and bh_cache buffers
2499 * get BH_Lock set before the stripe lock is released. 2538 * get BH_Lock set before the stripe lock is released.
2500 * 2539 *
@@ -2507,11 +2546,13 @@ static void handle_stripe5(struct stripe_head *sh)
2507 struct bio *return_bi = NULL; 2546 struct bio *return_bi = NULL;
2508 struct stripe_head_state s; 2547 struct stripe_head_state s;
2509 struct r5dev *dev; 2548 struct r5dev *dev;
2549 unsigned long pending = 0;
2510 2550
2511 memset(&s, 0, sizeof(s)); 2551 memset(&s, 0, sizeof(s));
2512 pr_debug("handling stripe %llu, cnt=%d, pd_idx=%d\n", 2552 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
2513 (unsigned long long)sh->sector, atomic_read(&sh->count), 2553 "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
2514 sh->pd_idx); 2554 atomic_read(&sh->count), sh->pd_idx,
2555 sh->ops.pending, sh->ops.ack, sh->ops.complete);
2515 2556
2516 spin_lock(&sh->lock); 2557 spin_lock(&sh->lock);
2517 clear_bit(STRIPE_HANDLE, &sh->state); 2558 clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2674,8 +2715,14 @@ static void handle_stripe5(struct stripe_head *sh)
2674 if (s.expanding && s.locked == 0) 2715 if (s.expanding && s.locked == 0)
2675 handle_stripe_expansion(conf, sh, NULL); 2716 handle_stripe_expansion(conf, sh, NULL);
2676 2717
2718 if (sh->ops.count)
2719 pending = get_stripe_work(sh);
2720
2677 spin_unlock(&sh->lock); 2721 spin_unlock(&sh->lock);
2678 2722
2723 if (pending)
2724 raid5_run_ops(sh, pending);
2725
2679 return_io(return_bi); 2726 return_io(return_bi);
2680 2727
2681 for (i=disks; i-- ;) { 2728 for (i=disks; i-- ;) {
@@ -3798,8 +3845,10 @@ static void raid5d (mddev_t *mddev)
3798 handled++; 3845 handled++;
3799 } 3846 }
3800 3847
3801 if (list_empty(&conf->handle_list)) 3848 if (list_empty(&conf->handle_list)) {
3849 async_tx_issue_pending_all();
3802 break; 3850 break;
3851 }
3803 3852
3804 first = conf->handle_list.next; 3853 first = conf->handle_list.next;
3805 sh = list_entry(first, struct stripe_head, lru); 3854 sh = list_entry(first, struct stripe_head, lru);