diff options
author | Nick Piggin <npiggin@kernel.dk> | 2010-11-11 17:05:19 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-12 10:55:32 -0500 |
commit | 27d20fddc8af539464fc3ba499d6a830054c3bd6 (patch) | |
tree | 23514cfe88f90150a8635c47586a8a378fb905e3 /mm | |
parent | eaf06b241b091357e72b76863ba16e89610d31bd (diff) |
radix-tree: fix RCU bug
Salman Qazi describes the following radix-tree bug:
In the following case, we get can get a deadlock:
0. The radix tree contains two items, one has the index 0.
1. The reader (in this case find_get_pages) takes the rcu_read_lock.
2. The reader acquires slot(s) for item(s) including the index 0 item.
3. The non-zero index item is deleted, and as a consequence the other item is
moved to the root of the tree. The place where it used to be is queued for
deletion after the readers finish.
3b. The zero item is deleted, removing it from the direct slot, it remains in
the rcu-delayed indirect node.
4. The reader looks at the index 0 slot, and finds that the page has 0 ref
count
5. The reader looks at it again, hoping that the item will either be freed or
the ref count will increase. This never happens, as the slot it is looking
at will never be updated. Also, this slot can never be reclaimed because
the reader is holding rcu_read_lock and is in an infinite loop.
The fix is to re-use the same "indirect" pointer case that requires a slot
lookup retry into a general "retry the lookup" bit.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Reported-by: Salman Qazi <sqazi@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 26 |
1 files changed, 10 insertions, 16 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 4ee2e998e937..ea89840fc65f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -644,7 +644,9 @@ repeat: | |||
644 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); | 644 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
645 | if (pagep) { | 645 | if (pagep) { |
646 | page = radix_tree_deref_slot(pagep); | 646 | page = radix_tree_deref_slot(pagep); |
647 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | 647 | if (unlikely(!page)) |
648 | goto out; | ||
649 | if (radix_tree_deref_retry(page)) | ||
648 | goto repeat; | 650 | goto repeat; |
649 | 651 | ||
650 | if (!page_cache_get_speculative(page)) | 652 | if (!page_cache_get_speculative(page)) |
@@ -660,6 +662,7 @@ repeat: | |||
660 | goto repeat; | 662 | goto repeat; |
661 | } | 663 | } |
662 | } | 664 | } |
665 | out: | ||
663 | rcu_read_unlock(); | 666 | rcu_read_unlock(); |
664 | 667 | ||
665 | return page; | 668 | return page; |
@@ -777,12 +780,11 @@ repeat: | |||
777 | page = radix_tree_deref_slot((void **)pages[i]); | 780 | page = radix_tree_deref_slot((void **)pages[i]); |
778 | if (unlikely(!page)) | 781 | if (unlikely(!page)) |
779 | continue; | 782 | continue; |
780 | /* | 783 | if (radix_tree_deref_retry(page)) { |
781 | * this can only trigger if nr_found == 1, making livelock | 784 | if (ret) |
782 | * a non issue. | 785 | start = pages[ret-1]->index; |
783 | */ | ||
784 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
785 | goto restart; | 786 | goto restart; |
787 | } | ||
786 | 788 | ||
787 | if (!page_cache_get_speculative(page)) | 789 | if (!page_cache_get_speculative(page)) |
788 | goto repeat; | 790 | goto repeat; |
@@ -830,11 +832,7 @@ repeat: | |||
830 | page = radix_tree_deref_slot((void **)pages[i]); | 832 | page = radix_tree_deref_slot((void **)pages[i]); |
831 | if (unlikely(!page)) | 833 | if (unlikely(!page)) |
832 | continue; | 834 | continue; |
833 | /* | 835 | if (radix_tree_deref_retry(page)) |
834 | * this can only trigger if nr_found == 1, making livelock | ||
835 | * a non issue. | ||
836 | */ | ||
837 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
838 | goto restart; | 836 | goto restart; |
839 | 837 | ||
840 | if (page->mapping == NULL || page->index != index) | 838 | if (page->mapping == NULL || page->index != index) |
@@ -887,11 +885,7 @@ repeat: | |||
887 | page = radix_tree_deref_slot((void **)pages[i]); | 885 | page = radix_tree_deref_slot((void **)pages[i]); |
888 | if (unlikely(!page)) | 886 | if (unlikely(!page)) |
889 | continue; | 887 | continue; |
890 | /* | 888 | if (radix_tree_deref_retry(page)) |
891 | * this can only trigger if nr_found == 1, making livelock | ||
892 | * a non issue. | ||
893 | */ | ||
894 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
895 | goto restart; | 889 | goto restart; |
896 | 890 | ||
897 | if (!page_cache_get_speculative(page)) | 891 | if (!page_cache_get_speculative(page)) |