aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-07-25 22:45:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-26 15:00:06 -0400
commita60637c85893e7191faaafa6a72e197c24386727 (patch)
treefa3ec63f505e64d3b4a2be4efd9a5314ab5f6234
parente286781d5f2e9c846e012a39653a166e9d31777d (diff)
mm: lockless pagecache
Combine page_cache_get_speculative with lockless radix tree lookups to introduce lockless page cache lookups (ie. no mapping->tree_lock on the read-side). The only atomicity changes this introduces is that the gang pagecache lookup functions now behave as if they are implemented with multiple find_get_page calls, rather than operating on a snapshot of the pages. In practice, this atomicity guarantee is not used anyway, and it is to replace individual lookups, so these semantics are natural. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c179
1 files changed, 134 insertions, 45 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 4e182a9a14c0..feb8448d8618 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -637,15 +637,35 @@ void __lock_page_nosync(struct page *page)
637 * Is there a pagecache struct page at the given (mapping, offset) tuple? 637 * Is there a pagecache struct page at the given (mapping, offset) tuple?
638 * If yes, increment its refcount and return it; if no, return NULL. 638 * If yes, increment its refcount and return it; if no, return NULL.
639 */ 639 */
640struct page * find_get_page(struct address_space *mapping, pgoff_t offset) 640struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
641{ 641{
642 void **pagep;
642 struct page *page; 643 struct page *page;
643 644
644 read_lock_irq(&mapping->tree_lock); 645 rcu_read_lock();
645 page = radix_tree_lookup(&mapping->page_tree, offset); 646repeat:
646 if (page) 647 page = NULL;
647 page_cache_get(page); 648 pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
648 read_unlock_irq(&mapping->tree_lock); 649 if (pagep) {
650 page = radix_tree_deref_slot(pagep);
651 if (unlikely(!page || page == RADIX_TREE_RETRY))
652 goto repeat;
653
654 if (!page_cache_get_speculative(page))
655 goto repeat;
656
657 /*
658 * Has the page moved?
659 * This is part of the lockless pagecache protocol. See
660 * include/linux/pagemap.h for details.
661 */
662 if (unlikely(page != *pagep)) {
663 page_cache_release(page);
664 goto repeat;
665 }
666 }
667 rcu_read_unlock();
668
649 return page; 669 return page;
650} 670}
651EXPORT_SYMBOL(find_get_page); 671EXPORT_SYMBOL(find_get_page);
@@ -660,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
660 * 680 *
661 * Returns zero if the page was not present. find_lock_page() may sleep. 681 * Returns zero if the page was not present. find_lock_page() may sleep.
662 */ 682 */
663struct page *find_lock_page(struct address_space *mapping, 683struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
664 pgoff_t offset)
665{ 684{
666 struct page *page; 685 struct page *page;
667 686
668repeat: 687repeat:
669 read_lock_irq(&mapping->tree_lock); 688 page = find_get_page(mapping, offset);
670 page = radix_tree_lookup(&mapping->page_tree, offset);
671 if (page) { 689 if (page) {
672 page_cache_get(page); 690 lock_page(page);
673 if (TestSetPageLocked(page)) { 691 /* Has the page been truncated? */
674 read_unlock_irq(&mapping->tree_lock); 692 if (unlikely(page->mapping != mapping)) {
675 __lock_page(page); 693 unlock_page(page);
676 694 page_cache_release(page);
677 /* Has the page been truncated while we slept? */ 695 goto repeat;
678 if (unlikely(page->mapping != mapping)) {
679 unlock_page(page);
680 page_cache_release(page);
681 goto repeat;
682 }
683 VM_BUG_ON(page->index != offset);
684 goto out;
685 } 696 }
697 VM_BUG_ON(page->index != offset);
686 } 698 }
687 read_unlock_irq(&mapping->tree_lock);
688out:
689 return page; 699 return page;
690} 700}
691EXPORT_SYMBOL(find_lock_page); 701EXPORT_SYMBOL(find_lock_page);
@@ -751,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
751{ 761{
752 unsigned int i; 762 unsigned int i;
753 unsigned int ret; 763 unsigned int ret;
764 unsigned int nr_found;
765
766 rcu_read_lock();
767restart:
768 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
769 (void ***)pages, start, nr_pages);
770 ret = 0;
771 for (i = 0; i < nr_found; i++) {
772 struct page *page;
773repeat:
774 page = radix_tree_deref_slot((void **)pages[i]);
775 if (unlikely(!page))
776 continue;
777 /*
778 * this can only trigger if nr_found == 1, making livelock
779 * a non issue.
780 */
781 if (unlikely(page == RADIX_TREE_RETRY))
782 goto restart;
783
784 if (!page_cache_get_speculative(page))
785 goto repeat;
754 786
755 read_lock_irq(&mapping->tree_lock); 787 /* Has the page moved? */
756 ret = radix_tree_gang_lookup(&mapping->page_tree, 788 if (unlikely(page != *((void **)pages[i]))) {
757 (void **)pages, start, nr_pages); 789 page_cache_release(page);
758 for (i = 0; i < ret; i++) 790 goto repeat;
759 page_cache_get(pages[i]); 791 }
760 read_unlock_irq(&mapping->tree_lock); 792
793 pages[ret] = page;
794 ret++;
795 }
796 rcu_read_unlock();
761 return ret; 797 return ret;
762} 798}
763 799
@@ -778,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
778{ 814{
779 unsigned int i; 815 unsigned int i;
780 unsigned int ret; 816 unsigned int ret;
817 unsigned int nr_found;
818
819 rcu_read_lock();
820restart:
821 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
822 (void ***)pages, index, nr_pages);
823 ret = 0;
824 for (i = 0; i < nr_found; i++) {
825 struct page *page;
826repeat:
827 page = radix_tree_deref_slot((void **)pages[i]);
828 if (unlikely(!page))
829 continue;
830 /*
831 * this can only trigger if nr_found == 1, making livelock
832 * a non issue.
833 */
834 if (unlikely(page == RADIX_TREE_RETRY))
835 goto restart;
781 836
782 read_lock_irq(&mapping->tree_lock); 837 if (page->mapping == NULL || page->index != index)
783 ret = radix_tree_gang_lookup(&mapping->page_tree,
784 (void **)pages, index, nr_pages);
785 for (i = 0; i < ret; i++) {
786 if (pages[i]->mapping == NULL || pages[i]->index != index)
787 break; 838 break;
788 839
789 page_cache_get(pages[i]); 840 if (!page_cache_get_speculative(page))
841 goto repeat;
842
843 /* Has the page moved? */
844 if (unlikely(page != *((void **)pages[i]))) {
845 page_cache_release(page);
846 goto repeat;
847 }
848
849 pages[ret] = page;
850 ret++;
790 index++; 851 index++;
791 } 852 }
792 read_unlock_irq(&mapping->tree_lock); 853 rcu_read_unlock();
793 return i; 854 return ret;
794} 855}
795EXPORT_SYMBOL(find_get_pages_contig); 856EXPORT_SYMBOL(find_get_pages_contig);
796 857
@@ -810,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
810{ 871{
811 unsigned int i; 872 unsigned int i;
812 unsigned int ret; 873 unsigned int ret;
874 unsigned int nr_found;
875
876 rcu_read_lock();
877restart:
878 nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
879 (void ***)pages, *index, nr_pages, tag);
880 ret = 0;
881 for (i = 0; i < nr_found; i++) {
882 struct page *page;
883repeat:
884 page = radix_tree_deref_slot((void **)pages[i]);
885 if (unlikely(!page))
886 continue;
887 /*
888 * this can only trigger if nr_found == 1, making livelock
889 * a non issue.
890 */
891 if (unlikely(page == RADIX_TREE_RETRY))
892 goto restart;
893
894 if (!page_cache_get_speculative(page))
895 goto repeat;
896
897 /* Has the page moved? */
898 if (unlikely(page != *((void **)pages[i]))) {
899 page_cache_release(page);
900 goto repeat;
901 }
902
903 pages[ret] = page;
904 ret++;
905 }
906 rcu_read_unlock();
813 907
814 read_lock_irq(&mapping->tree_lock);
815 ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
816 (void **)pages, *index, nr_pages, tag);
817 for (i = 0; i < ret; i++)
818 page_cache_get(pages[i]);
819 if (ret) 908 if (ret)
820 *index = pages[ret - 1]->index + 1; 909 *index = pages[ret - 1]->index + 1;
821 read_unlock_irq(&mapping->tree_lock); 910
822 return ret; 911 return ret;
823} 912}
824EXPORT_SYMBOL(find_get_pages_tag); 913EXPORT_SYMBOL(find_get_pages_tag);