diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-06-23 03:09:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 12:45:18 -0400 |
commit | 35a82d1a53e1a9ad54efafcc940f9335beaed5c3 (patch) | |
tree | 776d5b01970c5ce3e2c9fd4a2c4cf2168a0afa3c /drivers/block | |
parent | ab4af03a4054bd78bcabfb2214c9597201beae35 (diff) |
[PATCH] optimise loop driver a bit
Looks like locking can be optimised quite a lot. Increase lock widths
slightly so lo_lock is taken fewer times per request. Also it was quite
trivial to cover lo_pending with that lock, and remove the atomic
requirement. This also makes memory ordering explicitly correct, which is
nice (not that I particularly saw any mem ordering bugs).
Test was reading 4 250MB files in parallel on ext2-on-tmpfs filesystem (1K
block size, 4K page size). System is 2 socket Xeon with HT (4 thread).
intel:/home/npiggin# umount /dev/loop0 ; mount /dev/loop0 /mnt/loop ; /usr/bin/time ./mtloop.sh
Before:
0.24user 5.51system 0:02.84elapsed 202%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.52system 0:02.88elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.57system 0:02.89elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.22user 5.51system 0:02.90elapsed 197%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.44system 0:02.91elapsed 193%CPU (0avgtext+0avgdata 0maxresident)k
After:
0.07user 2.34system 0:01.68elapsed 143%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.37system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.39system 0:01.68elapsed 145%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.36system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.42system 0:01.68elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/loop.c | 81 |
1 files changed, 38 insertions, 43 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 6f011d0d8e97..b35e08876dd4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | |||
472 | */ | 472 | */ |
473 | static void loop_add_bio(struct loop_device *lo, struct bio *bio) | 473 | static void loop_add_bio(struct loop_device *lo, struct bio *bio) |
474 | { | 474 | { |
475 | unsigned long flags; | ||
476 | |||
477 | spin_lock_irqsave(&lo->lo_lock, flags); | ||
478 | if (lo->lo_biotail) { | 475 | if (lo->lo_biotail) { |
479 | lo->lo_biotail->bi_next = bio; | 476 | lo->lo_biotail->bi_next = bio; |
480 | lo->lo_biotail = bio; | 477 | lo->lo_biotail = bio; |
481 | } else | 478 | } else |
482 | lo->lo_bio = lo->lo_biotail = bio; | 479 | lo->lo_bio = lo->lo_biotail = bio; |
483 | spin_unlock_irqrestore(&lo->lo_lock, flags); | ||
484 | |||
485 | up(&lo->lo_bh_mutex); | ||
486 | } | 480 | } |
487 | 481 | ||
488 | /* | 482 | /* |
@@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo) | |||
492 | { | 486 | { |
493 | struct bio *bio; | 487 | struct bio *bio; |
494 | 488 | ||
495 | spin_lock_irq(&lo->lo_lock); | ||
496 | if ((bio = lo->lo_bio)) { | 489 | if ((bio = lo->lo_bio)) { |
497 | if (bio == lo->lo_biotail) | 490 | if (bio == lo->lo_biotail) |
498 | lo->lo_biotail = NULL; | 491 | lo->lo_biotail = NULL; |
499 | lo->lo_bio = bio->bi_next; | 492 | lo->lo_bio = bio->bi_next; |
500 | bio->bi_next = NULL; | 493 | bio->bi_next = NULL; |
501 | } | 494 | } |
502 | spin_unlock_irq(&lo->lo_lock); | ||
503 | 495 | ||
504 | return bio; | 496 | return bio; |
505 | } | 497 | } |
@@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) | |||
509 | struct loop_device *lo = q->queuedata; | 501 | struct loop_device *lo = q->queuedata; |
510 | int rw = bio_rw(old_bio); | 502 | int rw = bio_rw(old_bio); |
511 | 503 | ||
512 | if (!lo) | 504 | if (rw == READA) |
513 | goto out; | 505 | rw = READ; |
506 | |||
507 | BUG_ON(!lo || (rw != READ && rw != WRITE)); | ||
514 | 508 | ||
515 | spin_lock_irq(&lo->lo_lock); | 509 | spin_lock_irq(&lo->lo_lock); |
516 | if (lo->lo_state != Lo_bound) | 510 | if (lo->lo_state != Lo_bound) |
517 | goto inactive; | 511 | goto out; |
518 | atomic_inc(&lo->lo_pending); | 512 | if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) |
519 | spin_unlock_irq(&lo->lo_lock); | 513 | goto out; |
520 | 514 | lo->lo_pending++; | |
521 | if (rw == WRITE) { | ||
522 | if (lo->lo_flags & LO_FLAGS_READ_ONLY) | ||
523 | goto err; | ||
524 | } else if (rw == READA) { | ||
525 | rw = READ; | ||
526 | } else if (rw != READ) { | ||
527 | printk(KERN_ERR "loop: unknown command (%x)\n", rw); | ||
528 | goto err; | ||
529 | } | ||
530 | loop_add_bio(lo, old_bio); | 515 | loop_add_bio(lo, old_bio); |
516 | spin_unlock_irq(&lo->lo_lock); | ||
517 | up(&lo->lo_bh_mutex); | ||
531 | return 0; | 518 | return 0; |
532 | err: | 519 | |
533 | if (atomic_dec_and_test(&lo->lo_pending)) | ||
534 | up(&lo->lo_bh_mutex); | ||
535 | out: | 520 | out: |
521 | if (lo->lo_pending == 0) | ||
522 | up(&lo->lo_bh_mutex); | ||
523 | spin_unlock_irq(&lo->lo_lock); | ||
536 | bio_io_error(old_bio, old_bio->bi_size); | 524 | bio_io_error(old_bio, old_bio->bi_size); |
537 | return 0; | 525 | return 0; |
538 | inactive: | ||
539 | spin_unlock_irq(&lo->lo_lock); | ||
540 | goto out; | ||
541 | } | 526 | } |
542 | 527 | ||
543 | /* | 528 | /* |
@@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *); | |||
560 | 545 | ||
561 | static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) | 546 | static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) |
562 | { | 547 | { |
563 | int ret; | ||
564 | |||
565 | if (unlikely(!bio->bi_bdev)) { | 548 | if (unlikely(!bio->bi_bdev)) { |
566 | do_loop_switch(lo, bio->bi_private); | 549 | do_loop_switch(lo, bio->bi_private); |
567 | bio_put(bio); | 550 | bio_put(bio); |
568 | } else { | 551 | } else { |
569 | ret = do_bio_filebacked(lo, bio); | 552 | int ret = do_bio_filebacked(lo, bio); |
570 | bio_endio(bio, bio->bi_size, ret); | 553 | bio_endio(bio, bio->bi_size, ret); |
571 | } | 554 | } |
572 | } | 555 | } |
@@ -594,7 +577,7 @@ static int loop_thread(void *data) | |||
594 | set_user_nice(current, -20); | 577 | set_user_nice(current, -20); |
595 | 578 | ||
596 | lo->lo_state = Lo_bound; | 579 | lo->lo_state = Lo_bound; |
597 | atomic_inc(&lo->lo_pending); | 580 | lo->lo_pending = 1; |
598 | 581 | ||
599 | /* | 582 | /* |
600 | * up sem, we are running | 583 | * up sem, we are running |
@@ -602,26 +585,37 @@ static int loop_thread(void *data) | |||
602 | up(&lo->lo_sem); | 585 | up(&lo->lo_sem); |
603 | 586 | ||
604 | for (;;) { | 587 | for (;;) { |
605 | down_interruptible(&lo->lo_bh_mutex); | 588 | int pending; |
589 | |||
606 | /* | 590 | /* |
607 | * could be upped because of tear-down, not because of | 591 | * interruptible just to not contribute to load avg |
608 | * pending work | ||
609 | */ | 592 | */ |
610 | if (!atomic_read(&lo->lo_pending)) | 593 | if (down_interruptible(&lo->lo_bh_mutex)) |
594 | continue; | ||
595 | |||
596 | spin_lock_irq(&lo->lo_lock); | ||
597 | |||
598 | /* | ||
599 | * could be upped because of tear-down, not pending work | ||
600 | */ | ||
601 | if (unlikely(!lo->lo_pending)) { | ||
602 | spin_unlock_irq(&lo->lo_lock); | ||
611 | break; | 603 | break; |
604 | } | ||
612 | 605 | ||
613 | bio = loop_get_bio(lo); | 606 | bio = loop_get_bio(lo); |
614 | if (!bio) { | 607 | lo->lo_pending--; |
615 | printk("loop: missing bio\n"); | 608 | pending = lo->lo_pending; |
616 | continue; | 609 | spin_unlock_irq(&lo->lo_lock); |
617 | } | 610 | |
611 | BUG_ON(!bio); | ||
618 | loop_handle_bio(lo, bio); | 612 | loop_handle_bio(lo, bio); |
619 | 613 | ||
620 | /* | 614 | /* |
621 | * upped both for pending work and tear-down, lo_pending | 615 | * upped both for pending work and tear-down, lo_pending |
622 | * will hit zero then | 616 | * will hit zero then |
623 | */ | 617 | */ |
624 | if (atomic_dec_and_test(&lo->lo_pending)) | 618 | if (unlikely(!pending)) |
625 | break; | 619 | break; |
626 | } | 620 | } |
627 | 621 | ||
@@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) | |||
900 | 894 | ||
901 | spin_lock_irq(&lo->lo_lock); | 895 | spin_lock_irq(&lo->lo_lock); |
902 | lo->lo_state = Lo_rundown; | 896 | lo->lo_state = Lo_rundown; |
903 | if (atomic_dec_and_test(&lo->lo_pending)) | 897 | lo->lo_pending--; |
898 | if (!lo->lo_pending) | ||
904 | up(&lo->lo_bh_mutex); | 899 | up(&lo->lo_bh_mutex); |
905 | spin_unlock_irq(&lo->lo_lock); | 900 | spin_unlock_irq(&lo->lo_lock); |
906 | 901 | ||