diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-01 14:14:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-01 14:14:49 -0400 |
commit | 4ceb5db9757aaeadcf8fbbf97d76bd42aa4df0d6 (patch) | |
tree | 6a3108ceea457c21130838d49736f5e9de3badc3 | |
parent | 8d894c47975f7222c5537e450e71310b395488c7 (diff) |
Fix get_user_pages() race for write access
There's no real guarantee that handle_mm_fault() will always be able to
break a COW situation - if an update from another thread ends up
modifying the page table some way, handle_mm_fault() may end up
requiring us to re-try the operation.
That's normally fine, but get_user_pages() ended up re-trying it as a
read, and thus a write access could in theory end up losing the dirty
bit or be done on a page that had not been properly COW'ed.
This makes get_user_pages() always retry write accesses as write
accesses by making "follow_page()" require that a writable follow has
the dirty bit set. That simplifies the code and solves the race: if the
COW break fails for some reason, we'll just loop around and try again.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/memory.c | 21 |
1 files changed, 4 insertions, 17 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6fe77acbc1cd..4e1c673784db 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, | |||
811 | pte = *ptep; | 811 | pte = *ptep; |
812 | pte_unmap(ptep); | 812 | pte_unmap(ptep); |
813 | if (pte_present(pte)) { | 813 | if (pte_present(pte)) { |
814 | if (write && !pte_write(pte)) | 814 | if (write && !pte_dirty(pte)) |
815 | goto out; | 815 | goto out; |
816 | if (read && !pte_read(pte)) | 816 | if (read && !pte_read(pte)) |
817 | goto out; | 817 | goto out; |
818 | pfn = pte_pfn(pte); | 818 | pfn = pte_pfn(pte); |
819 | if (pfn_valid(pfn)) { | 819 | if (pfn_valid(pfn)) { |
820 | page = pfn_to_page(pfn); | 820 | page = pfn_to_page(pfn); |
821 | if (accessed) { | 821 | if (accessed) |
822 | if (write && !pte_dirty(pte) &&!PageDirty(page)) | ||
823 | set_page_dirty(page); | ||
824 | mark_page_accessed(page); | 822 | mark_page_accessed(page); |
825 | } | ||
826 | return page; | 823 | return page; |
827 | } | 824 | } |
828 | } | 825 | } |
@@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
941 | spin_lock(&mm->page_table_lock); | 938 | spin_lock(&mm->page_table_lock); |
942 | do { | 939 | do { |
943 | struct page *page; | 940 | struct page *page; |
944 | int lookup_write = write; | ||
945 | 941 | ||
946 | cond_resched_lock(&mm->page_table_lock); | 942 | cond_resched_lock(&mm->page_table_lock); |
947 | while (!(page = follow_page(mm, start, lookup_write))) { | 943 | while (!(page = follow_page(mm, start, write))) { |
948 | /* | 944 | /* |
949 | * Shortcut for anonymous pages. We don't want | 945 | * Shortcut for anonymous pages. We don't want |
950 | * to force the creation of pages tables for | 946 | * to force the creation of pages tables for |
@@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
952 | * nobody touched so far. This is important | 948 | * nobody touched so far. This is important |
953 | * for doing a core dump for these mappings. | 949 | * for doing a core dump for these mappings. |
954 | */ | 950 | */ |
955 | if (!lookup_write && | 951 | if (!write && untouched_anonymous_page(mm,vma,start)) { |
956 | untouched_anonymous_page(mm,vma,start)) { | ||
957 | page = ZERO_PAGE(start); | 952 | page = ZERO_PAGE(start); |
958 | break; | 953 | break; |
959 | } | 954 | } |
@@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
972 | default: | 967 | default: |
973 | BUG(); | 968 | BUG(); |
974 | } | 969 | } |
975 | /* | ||
976 | * Now that we have performed a write fault | ||
977 | * and surely no longer have a shared page we | ||
978 | * shouldn't write, we shouldn't ignore an | ||
979 | * unwritable page in the page table if | ||
980 | * we are forcing write access. | ||
981 | */ | ||
982 | lookup_write = write && !force; | ||
983 | spin_lock(&mm->page_table_lock); | 970 | spin_lock(&mm->page_table_lock); |
984 | } | 971 | } |
985 | if (pages) { | 972 | if (pages) { |