aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-08-01 14:14:49 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-08-01 14:14:49 -0400
commit4ceb5db9757aaeadcf8fbbf97d76bd42aa4df0d6 (patch)
tree6a3108ceea457c21130838d49736f5e9de3badc3
parent8d894c47975f7222c5537e450e71310b395488c7 (diff)
Fix get_user_pages() race for write access
There's no real guarantee that handle_mm_fault() will always be able to break a COW situation - if an update from another thread ends up modifying the page table some way, handle_mm_fault() may end up requiring us to re-try the operation. That's normally fine, but get_user_pages() ended up re-trying it as a read, and thus a write access could in theory end up losing the dirty bit or be done on a page that had not been properly COW'ed. This makes get_user_pages() always retry write accesses as write accesses by making "follow_page()" require that a writable follow has the dirty bit set. That simplifies the code and solves the race: if the COW break fails for some reason, we'll just loop around and try again. Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/memory.c21
1 files changed, 4 insertions, 17 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 6fe77acbc1cd..4e1c673784db 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
811 pte = *ptep; 811 pte = *ptep;
812 pte_unmap(ptep); 812 pte_unmap(ptep);
813 if (pte_present(pte)) { 813 if (pte_present(pte)) {
814 if (write && !pte_write(pte)) 814 if (write && !pte_dirty(pte))
815 goto out; 815 goto out;
816 if (read && !pte_read(pte)) 816 if (read && !pte_read(pte))
817 goto out; 817 goto out;
818 pfn = pte_pfn(pte); 818 pfn = pte_pfn(pte);
819 if (pfn_valid(pfn)) { 819 if (pfn_valid(pfn)) {
820 page = pfn_to_page(pfn); 820 page = pfn_to_page(pfn);
821 if (accessed) { 821 if (accessed)
822 if (write && !pte_dirty(pte) &&!PageDirty(page))
823 set_page_dirty(page);
824 mark_page_accessed(page); 822 mark_page_accessed(page);
825 }
826 return page; 823 return page;
827 } 824 }
828 } 825 }
@@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
941 spin_lock(&mm->page_table_lock); 938 spin_lock(&mm->page_table_lock);
942 do { 939 do {
943 struct page *page; 940 struct page *page;
944 int lookup_write = write;
945 941
946 cond_resched_lock(&mm->page_table_lock); 942 cond_resched_lock(&mm->page_table_lock);
947 while (!(page = follow_page(mm, start, lookup_write))) { 943 while (!(page = follow_page(mm, start, write))) {
948 /* 944 /*
949 * Shortcut for anonymous pages. We don't want 945 * Shortcut for anonymous pages. We don't want
950 * to force the creation of pages tables for 946 * to force the creation of pages tables for
@@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
952 * nobody touched so far. This is important 948 * nobody touched so far. This is important
953 * for doing a core dump for these mappings. 949 * for doing a core dump for these mappings.
954 */ 950 */
955 if (!lookup_write && 951 if (!write && untouched_anonymous_page(mm,vma,start)) {
956 untouched_anonymous_page(mm,vma,start)) {
957 page = ZERO_PAGE(start); 952 page = ZERO_PAGE(start);
958 break; 953 break;
959 } 954 }
@@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
972 default: 967 default:
973 BUG(); 968 BUG();
974 } 969 }
975 /*
976 * Now that we have performed a write fault
977 * and surely no longer have a shared page we
978 * shouldn't write, we shouldn't ignore an
979 * unwritable page in the page table if
980 * we are forcing write access.
981 */
982 lookup_write = write && !force;
983 spin_lock(&mm->page_table_lock); 970 spin_lock(&mm->page_table_lock);
984 } 971 }
985 if (pages) { 972 if (pages) {