diff options
author | Nick Piggin <npiggin@suse.de> | 2008-07-25 22:45:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-26 15:00:06 -0400 |
commit | a60637c85893e7191faaafa6a72e197c24386727 (patch) | |
tree | fa3ec63f505e64d3b4a2be4efd9a5314ab5f6234 /mm | |
parent | e286781d5f2e9c846e012a39653a166e9d31777d (diff) |
mm: lockless pagecache
Combine page_cache_get_speculative with lockless radix tree lookups to
introduce lockless page cache lookups (ie. no mapping->tree_lock on the
read-side).
The only atomicity changes this introduces is that the gang pagecache
lookup functions now behave as if they are implemented with multiple
find_get_page calls, rather than operating on a snapshot of the pages. In
practice, this atomicity guarantee is not used anyway, and it is to
replace individual lookups, so these semantics are natural.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 179 |
1 files changed, 134 insertions, 45 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 4e182a9a14c0..feb8448d8618 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -637,15 +637,35 @@ void __lock_page_nosync(struct page *page) | |||
637 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 637 | * Is there a pagecache struct page at the given (mapping, offset) tuple? |
638 | * If yes, increment its refcount and return it; if no, return NULL. | 638 | * If yes, increment its refcount and return it; if no, return NULL. |
639 | */ | 639 | */ |
640 | struct page * find_get_page(struct address_space *mapping, pgoff_t offset) | 640 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) |
641 | { | 641 | { |
642 | void **pagep; | ||
642 | struct page *page; | 643 | struct page *page; |
643 | 644 | ||
644 | read_lock_irq(&mapping->tree_lock); | 645 | rcu_read_lock(); |
645 | page = radix_tree_lookup(&mapping->page_tree, offset); | 646 | repeat: |
646 | if (page) | 647 | page = NULL; |
647 | page_cache_get(page); | 648 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
648 | read_unlock_irq(&mapping->tree_lock); | 649 | if (pagep) { |
650 | page = radix_tree_deref_slot(pagep); | ||
651 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | ||
652 | goto repeat; | ||
653 | |||
654 | if (!page_cache_get_speculative(page)) | ||
655 | goto repeat; | ||
656 | |||
657 | /* | ||
658 | * Has the page moved? | ||
659 | * This is part of the lockless pagecache protocol. See | ||
660 | * include/linux/pagemap.h for details. | ||
661 | */ | ||
662 | if (unlikely(page != *pagep)) { | ||
663 | page_cache_release(page); | ||
664 | goto repeat; | ||
665 | } | ||
666 | } | ||
667 | rcu_read_unlock(); | ||
668 | |||
649 | return page; | 669 | return page; |
650 | } | 670 | } |
651 | EXPORT_SYMBOL(find_get_page); | 671 | EXPORT_SYMBOL(find_get_page); |
@@ -660,32 +680,22 @@ EXPORT_SYMBOL(find_get_page); | |||
660 | * | 680 | * |
661 | * Returns zero if the page was not present. find_lock_page() may sleep. | 681 | * Returns zero if the page was not present. find_lock_page() may sleep. |
662 | */ | 682 | */ |
663 | struct page *find_lock_page(struct address_space *mapping, | 683 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) |
664 | pgoff_t offset) | ||
665 | { | 684 | { |
666 | struct page *page; | 685 | struct page *page; |
667 | 686 | ||
668 | repeat: | 687 | repeat: |
669 | read_lock_irq(&mapping->tree_lock); | 688 | page = find_get_page(mapping, offset); |
670 | page = radix_tree_lookup(&mapping->page_tree, offset); | ||
671 | if (page) { | 689 | if (page) { |
672 | page_cache_get(page); | 690 | lock_page(page); |
673 | if (TestSetPageLocked(page)) { | 691 | /* Has the page been truncated? */ |
674 | read_unlock_irq(&mapping->tree_lock); | 692 | if (unlikely(page->mapping != mapping)) { |
675 | __lock_page(page); | 693 | unlock_page(page); |
676 | 694 | page_cache_release(page); | |
677 | /* Has the page been truncated while we slept? */ | 695 | goto repeat; |
678 | if (unlikely(page->mapping != mapping)) { | ||
679 | unlock_page(page); | ||
680 | page_cache_release(page); | ||
681 | goto repeat; | ||
682 | } | ||
683 | VM_BUG_ON(page->index != offset); | ||
684 | goto out; | ||
685 | } | 696 | } |
697 | VM_BUG_ON(page->index != offset); | ||
686 | } | 698 | } |
687 | read_unlock_irq(&mapping->tree_lock); | ||
688 | out: | ||
689 | return page; | 699 | return page; |
690 | } | 700 | } |
691 | EXPORT_SYMBOL(find_lock_page); | 701 | EXPORT_SYMBOL(find_lock_page); |
@@ -751,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
751 | { | 761 | { |
752 | unsigned int i; | 762 | unsigned int i; |
753 | unsigned int ret; | 763 | unsigned int ret; |
764 | unsigned int nr_found; | ||
765 | |||
766 | rcu_read_lock(); | ||
767 | restart: | ||
768 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
769 | (void ***)pages, start, nr_pages); | ||
770 | ret = 0; | ||
771 | for (i = 0; i < nr_found; i++) { | ||
772 | struct page *page; | ||
773 | repeat: | ||
774 | page = radix_tree_deref_slot((void **)pages[i]); | ||
775 | if (unlikely(!page)) | ||
776 | continue; | ||
777 | /* | ||
778 | * this can only trigger if nr_found == 1, making livelock | ||
779 | * a non issue. | ||
780 | */ | ||
781 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
782 | goto restart; | ||
783 | |||
784 | if (!page_cache_get_speculative(page)) | ||
785 | goto repeat; | ||
754 | 786 | ||
755 | read_lock_irq(&mapping->tree_lock); | 787 | /* Has the page moved? */ |
756 | ret = radix_tree_gang_lookup(&mapping->page_tree, | 788 | if (unlikely(page != *((void **)pages[i]))) { |
757 | (void **)pages, start, nr_pages); | 789 | page_cache_release(page); |
758 | for (i = 0; i < ret; i++) | 790 | goto repeat; |
759 | page_cache_get(pages[i]); | 791 | } |
760 | read_unlock_irq(&mapping->tree_lock); | 792 | |
793 | pages[ret] = page; | ||
794 | ret++; | ||
795 | } | ||
796 | rcu_read_unlock(); | ||
761 | return ret; | 797 | return ret; |
762 | } | 798 | } |
763 | 799 | ||
@@ -778,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
778 | { | 814 | { |
779 | unsigned int i; | 815 | unsigned int i; |
780 | unsigned int ret; | 816 | unsigned int ret; |
817 | unsigned int nr_found; | ||
818 | |||
819 | rcu_read_lock(); | ||
820 | restart: | ||
821 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
822 | (void ***)pages, index, nr_pages); | ||
823 | ret = 0; | ||
824 | for (i = 0; i < nr_found; i++) { | ||
825 | struct page *page; | ||
826 | repeat: | ||
827 | page = radix_tree_deref_slot((void **)pages[i]); | ||
828 | if (unlikely(!page)) | ||
829 | continue; | ||
830 | /* | ||
831 | * this can only trigger if nr_found == 1, making livelock | ||
832 | * a non issue. | ||
833 | */ | ||
834 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
835 | goto restart; | ||
781 | 836 | ||
782 | read_lock_irq(&mapping->tree_lock); | 837 | if (page->mapping == NULL || page->index != index) |
783 | ret = radix_tree_gang_lookup(&mapping->page_tree, | ||
784 | (void **)pages, index, nr_pages); | ||
785 | for (i = 0; i < ret; i++) { | ||
786 | if (pages[i]->mapping == NULL || pages[i]->index != index) | ||
787 | break; | 838 | break; |
788 | 839 | ||
789 | page_cache_get(pages[i]); | 840 | if (!page_cache_get_speculative(page)) |
841 | goto repeat; | ||
842 | |||
843 | /* Has the page moved? */ | ||
844 | if (unlikely(page != *((void **)pages[i]))) { | ||
845 | page_cache_release(page); | ||
846 | goto repeat; | ||
847 | } | ||
848 | |||
849 | pages[ret] = page; | ||
850 | ret++; | ||
790 | index++; | 851 | index++; |
791 | } | 852 | } |
792 | read_unlock_irq(&mapping->tree_lock); | 853 | rcu_read_unlock(); |
793 | return i; | 854 | return ret; |
794 | } | 855 | } |
795 | EXPORT_SYMBOL(find_get_pages_contig); | 856 | EXPORT_SYMBOL(find_get_pages_contig); |
796 | 857 | ||
@@ -810,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
810 | { | 871 | { |
811 | unsigned int i; | 872 | unsigned int i; |
812 | unsigned int ret; | 873 | unsigned int ret; |
874 | unsigned int nr_found; | ||
875 | |||
876 | rcu_read_lock(); | ||
877 | restart: | ||
878 | nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, | ||
879 | (void ***)pages, *index, nr_pages, tag); | ||
880 | ret = 0; | ||
881 | for (i = 0; i < nr_found; i++) { | ||
882 | struct page *page; | ||
883 | repeat: | ||
884 | page = radix_tree_deref_slot((void **)pages[i]); | ||
885 | if (unlikely(!page)) | ||
886 | continue; | ||
887 | /* | ||
888 | * this can only trigger if nr_found == 1, making livelock | ||
889 | * a non issue. | ||
890 | */ | ||
891 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
892 | goto restart; | ||
893 | |||
894 | if (!page_cache_get_speculative(page)) | ||
895 | goto repeat; | ||
896 | |||
897 | /* Has the page moved? */ | ||
898 | if (unlikely(page != *((void **)pages[i]))) { | ||
899 | page_cache_release(page); | ||
900 | goto repeat; | ||
901 | } | ||
902 | |||
903 | pages[ret] = page; | ||
904 | ret++; | ||
905 | } | ||
906 | rcu_read_unlock(); | ||
813 | 907 | ||
814 | read_lock_irq(&mapping->tree_lock); | ||
815 | ret = radix_tree_gang_lookup_tag(&mapping->page_tree, | ||
816 | (void **)pages, *index, nr_pages, tag); | ||
817 | for (i = 0; i < ret; i++) | ||
818 | page_cache_get(pages[i]); | ||
819 | if (ret) | 908 | if (ret) |
820 | *index = pages[ret - 1]->index + 1; | 909 | *index = pages[ret - 1]->index + 1; |
821 | read_unlock_irq(&mapping->tree_lock); | 910 | |
822 | return ret; | 911 | return ret; |
823 | } | 912 | } |
824 | EXPORT_SYMBOL(find_get_pages_tag); | 913 | EXPORT_SYMBOL(find_get_pages_tag); |