aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-06-23 03:09:06 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 12:45:18 -0400
commit35a82d1a53e1a9ad54efafcc940f9335beaed5c3 (patch)
tree776d5b01970c5ce3e2c9fd4a2c4cf2168a0afa3c /drivers/block
parentab4af03a4054bd78bcabfb2214c9597201beae35 (diff)
[PATCH] optimise loop driver a bit
Looks like locking can be optimised quite a lot. Increase lock widths slightly so lo_lock is taken fewer times per request. Also it was quite trivial to cover lo_pending with that lock, and remove the atomic requirement. This also makes memory ordering explicitly correct, which is nice (not that I particularly saw any mem ordering bugs). Test was reading 4 250MB files in parallel on ext2-on-tmpfs filesystem (1K block size, 4K page size). System is 2 socket Xeon with HT (4 thread). intel:/home/npiggin# umount /dev/loop0 ; mount /dev/loop0 /mnt/loop ; /usr/bin/time ./mtloop.sh Before: 0.24user 5.51system 0:02.84elapsed 202%CPU (0avgtext+0avgdata 0maxresident)k 0.19user 5.52system 0:02.88elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k 0.19user 5.57system 0:02.89elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k 0.22user 5.51system 0:02.90elapsed 197%CPU (0avgtext+0avgdata 0maxresident)k 0.19user 5.44system 0:02.91elapsed 193%CPU (0avgtext+0avgdata 0maxresident)k After: 0.07user 2.34system 0:01.68elapsed 143%CPU (0avgtext+0avgdata 0maxresident)k 0.06user 2.37system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k 0.06user 2.39system 0:01.68elapsed 145%CPU (0avgtext+0avgdata 0maxresident)k 0.06user 2.36system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k 0.06user 2.42system 0:01.68elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/loop.c81
1 files changed, 38 insertions, 43 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6f011d0d8e97..b35e08876dd4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
472 */ 472 */
473static void loop_add_bio(struct loop_device *lo, struct bio *bio) 473static void loop_add_bio(struct loop_device *lo, struct bio *bio)
474{ 474{
475 unsigned long flags;
476
477 spin_lock_irqsave(&lo->lo_lock, flags);
478 if (lo->lo_biotail) { 475 if (lo->lo_biotail) {
479 lo->lo_biotail->bi_next = bio; 476 lo->lo_biotail->bi_next = bio;
480 lo->lo_biotail = bio; 477 lo->lo_biotail = bio;
481 } else 478 } else
482 lo->lo_bio = lo->lo_biotail = bio; 479 lo->lo_bio = lo->lo_biotail = bio;
483 spin_unlock_irqrestore(&lo->lo_lock, flags);
484
485 up(&lo->lo_bh_mutex);
486} 480}
487 481
488/* 482/*
@@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo)
492{ 486{
493 struct bio *bio; 487 struct bio *bio;
494 488
495 spin_lock_irq(&lo->lo_lock);
496 if ((bio = lo->lo_bio)) { 489 if ((bio = lo->lo_bio)) {
497 if (bio == lo->lo_biotail) 490 if (bio == lo->lo_biotail)
498 lo->lo_biotail = NULL; 491 lo->lo_biotail = NULL;
499 lo->lo_bio = bio->bi_next; 492 lo->lo_bio = bio->bi_next;
500 bio->bi_next = NULL; 493 bio->bi_next = NULL;
501 } 494 }
502 spin_unlock_irq(&lo->lo_lock);
503 495
504 return bio; 496 return bio;
505} 497}
@@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
509 struct loop_device *lo = q->queuedata; 501 struct loop_device *lo = q->queuedata;
510 int rw = bio_rw(old_bio); 502 int rw = bio_rw(old_bio);
511 503
512 if (!lo) 504 if (rw == READA)
513 goto out; 505 rw = READ;
506
507 BUG_ON(!lo || (rw != READ && rw != WRITE));
514 508
515 spin_lock_irq(&lo->lo_lock); 509 spin_lock_irq(&lo->lo_lock);
516 if (lo->lo_state != Lo_bound) 510 if (lo->lo_state != Lo_bound)
517 goto inactive; 511 goto out;
518 atomic_inc(&lo->lo_pending); 512 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
519 spin_unlock_irq(&lo->lo_lock); 513 goto out;
520 514 lo->lo_pending++;
521 if (rw == WRITE) {
522 if (lo->lo_flags & LO_FLAGS_READ_ONLY)
523 goto err;
524 } else if (rw == READA) {
525 rw = READ;
526 } else if (rw != READ) {
527 printk(KERN_ERR "loop: unknown command (%x)\n", rw);
528 goto err;
529 }
530 loop_add_bio(lo, old_bio); 515 loop_add_bio(lo, old_bio);
516 spin_unlock_irq(&lo->lo_lock);
517 up(&lo->lo_bh_mutex);
531 return 0; 518 return 0;
532err: 519
533 if (atomic_dec_and_test(&lo->lo_pending))
534 up(&lo->lo_bh_mutex);
535out: 520out:
521 if (lo->lo_pending == 0)
522 up(&lo->lo_bh_mutex);
523 spin_unlock_irq(&lo->lo_lock);
536 bio_io_error(old_bio, old_bio->bi_size); 524 bio_io_error(old_bio, old_bio->bi_size);
537 return 0; 525 return 0;
538inactive:
539 spin_unlock_irq(&lo->lo_lock);
540 goto out;
541} 526}
542 527
543/* 528/*
@@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *);
560 545
561static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) 546static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
562{ 547{
563 int ret;
564
565 if (unlikely(!bio->bi_bdev)) { 548 if (unlikely(!bio->bi_bdev)) {
566 do_loop_switch(lo, bio->bi_private); 549 do_loop_switch(lo, bio->bi_private);
567 bio_put(bio); 550 bio_put(bio);
568 } else { 551 } else {
569 ret = do_bio_filebacked(lo, bio); 552 int ret = do_bio_filebacked(lo, bio);
570 bio_endio(bio, bio->bi_size, ret); 553 bio_endio(bio, bio->bi_size, ret);
571 } 554 }
572} 555}
@@ -594,7 +577,7 @@ static int loop_thread(void *data)
594 set_user_nice(current, -20); 577 set_user_nice(current, -20);
595 578
596 lo->lo_state = Lo_bound; 579 lo->lo_state = Lo_bound;
597 atomic_inc(&lo->lo_pending); 580 lo->lo_pending = 1;
598 581
599 /* 582 /*
600 * up sem, we are running 583 * up sem, we are running
@@ -602,26 +585,37 @@ static int loop_thread(void *data)
602 up(&lo->lo_sem); 585 up(&lo->lo_sem);
603 586
604 for (;;) { 587 for (;;) {
605 down_interruptible(&lo->lo_bh_mutex); 588 int pending;
589
606 /* 590 /*
607 * could be upped because of tear-down, not because of 591 * interruptible just to not contribute to load avg
608 * pending work
609 */ 592 */
610 if (!atomic_read(&lo->lo_pending)) 593 if (down_interruptible(&lo->lo_bh_mutex))
594 continue;
595
596 spin_lock_irq(&lo->lo_lock);
597
598 /*
599 * could be upped because of tear-down, not pending work
600 */
601 if (unlikely(!lo->lo_pending)) {
602 spin_unlock_irq(&lo->lo_lock);
611 break; 603 break;
604 }
612 605
613 bio = loop_get_bio(lo); 606 bio = loop_get_bio(lo);
614 if (!bio) { 607 lo->lo_pending--;
615 printk("loop: missing bio\n"); 608 pending = lo->lo_pending;
616 continue; 609 spin_unlock_irq(&lo->lo_lock);
617 } 610
611 BUG_ON(!bio);
618 loop_handle_bio(lo, bio); 612 loop_handle_bio(lo, bio);
619 613
620 /* 614 /*
621 * upped both for pending work and tear-down, lo_pending 615 * upped both for pending work and tear-down, lo_pending
622 * will hit zero then 616 * will hit zero then
623 */ 617 */
624 if (atomic_dec_and_test(&lo->lo_pending)) 618 if (unlikely(!pending))
625 break; 619 break;
626 } 620 }
627 621
@@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
900 894
901 spin_lock_irq(&lo->lo_lock); 895 spin_lock_irq(&lo->lo_lock);
902 lo->lo_state = Lo_rundown; 896 lo->lo_state = Lo_rundown;
903 if (atomic_dec_and_test(&lo->lo_pending)) 897 lo->lo_pending--;
898 if (!lo->lo_pending)
904 up(&lo->lo_bh_mutex); 899 up(&lo->lo_bh_mutex);
905 spin_unlock_irq(&lo->lo_lock); 900 spin_unlock_irq(&lo->lo_lock);
906 901