diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-08-03 06:24:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-03 12:12:05 -0400 |
commit | f33ea7f404e592e4563b12101b7a4d17da6558d7 (patch) | |
tree | 1d587ad8a06cb6d2e3a187f0312c8a524ffefe53 /mm/memory.c | |
parent | 5cb4cc0d8211c490537c8568001958fc76741312 (diff) |
[PATCH] fix get_user_pages bug
Checking pte_dirty instead of pte_write in __follow_page is problematic
for s390, and for copy_one_pte which leaves dirty when clearing write.
So revert __follow_page to check pte_write as before, and make
do_wp_page pass back a special extra VM_FAULT_WRITE bit to say it has
done its full job: once get_user_pages receives this value, it no longer
requires pte_write in __follow_page.
But most callers of handle_mm_fault, in the various architectures, have
switch statements which do not expect this new case. To avoid changing
them all in a hurry, make an inline wrapper function (using the old
name) that masks off the new bit, and use the extended interface with
double underscores.
Yes, we do have a call to do_wp_page from do_swap_page, but no need to
change that: in rare case it's needed, another do_wp_page will follow.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
[ Cleanups by Nick Piggin ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/mm/memory.c b/mm/memory.c index 2405289dfdf8..81d7117aa58b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -811,15 +811,18 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, | |||
811 | pte = *ptep; | 811 | pte = *ptep; |
812 | pte_unmap(ptep); | 812 | pte_unmap(ptep); |
813 | if (pte_present(pte)) { | 813 | if (pte_present(pte)) { |
814 | if (write && !pte_dirty(pte)) | 814 | if (write && !pte_write(pte)) |
815 | goto out; | 815 | goto out; |
816 | if (read && !pte_read(pte)) | 816 | if (read && !pte_read(pte)) |
817 | goto out; | 817 | goto out; |
818 | pfn = pte_pfn(pte); | 818 | pfn = pte_pfn(pte); |
819 | if (pfn_valid(pfn)) { | 819 | if (pfn_valid(pfn)) { |
820 | page = pfn_to_page(pfn); | 820 | page = pfn_to_page(pfn); |
821 | if (accessed) | 821 | if (accessed) { |
822 | if (write && !pte_dirty(pte) &&!PageDirty(page)) | ||
823 | set_page_dirty(page); | ||
822 | mark_page_accessed(page); | 824 | mark_page_accessed(page); |
825 | } | ||
823 | return page; | 826 | return page; |
824 | } | 827 | } |
825 | } | 828 | } |
@@ -941,10 +944,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
941 | } | 944 | } |
942 | spin_lock(&mm->page_table_lock); | 945 | spin_lock(&mm->page_table_lock); |
943 | do { | 946 | do { |
947 | int write_access = write; | ||
944 | struct page *page; | 948 | struct page *page; |
945 | 949 | ||
946 | cond_resched_lock(&mm->page_table_lock); | 950 | cond_resched_lock(&mm->page_table_lock); |
947 | while (!(page = follow_page(mm, start, write))) { | 951 | while (!(page = follow_page(mm, start, write_access))) { |
948 | /* | 952 | /* |
949 | * Shortcut for anonymous pages. We don't want | 953 | * Shortcut for anonymous pages. We don't want |
950 | * to force the creation of pages tables for | 954 | * to force the creation of pages tables for |
@@ -957,7 +961,16 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
957 | break; | 961 | break; |
958 | } | 962 | } |
959 | spin_unlock(&mm->page_table_lock); | 963 | spin_unlock(&mm->page_table_lock); |
960 | switch (handle_mm_fault(mm,vma,start,write)) { | 964 | switch (__handle_mm_fault(mm, vma, start, |
965 | write_access)) { | ||
966 | case VM_FAULT_WRITE: | ||
967 | /* | ||
968 | * do_wp_page has broken COW when | ||
969 | * necessary, even if maybe_mkwrite | ||
970 | * decided not to set pte_write | ||
971 | */ | ||
972 | write_access = 0; | ||
973 | /* FALLTHRU */ | ||
961 | case VM_FAULT_MINOR: | 974 | case VM_FAULT_MINOR: |
962 | tsk->min_flt++; | 975 | tsk->min_flt++; |
963 | break; | 976 | break; |
@@ -1220,6 +1233,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1220 | struct page *old_page, *new_page; | 1233 | struct page *old_page, *new_page; |
1221 | unsigned long pfn = pte_pfn(pte); | 1234 | unsigned long pfn = pte_pfn(pte); |
1222 | pte_t entry; | 1235 | pte_t entry; |
1236 | int ret; | ||
1223 | 1237 | ||
1224 | if (unlikely(!pfn_valid(pfn))) { | 1238 | if (unlikely(!pfn_valid(pfn))) { |
1225 | /* | 1239 | /* |
@@ -1247,7 +1261,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1247 | lazy_mmu_prot_update(entry); | 1261 | lazy_mmu_prot_update(entry); |
1248 | pte_unmap(page_table); | 1262 | pte_unmap(page_table); |
1249 | spin_unlock(&mm->page_table_lock); | 1263 | spin_unlock(&mm->page_table_lock); |
1250 | return VM_FAULT_MINOR; | 1264 | return VM_FAULT_MINOR|VM_FAULT_WRITE; |
1251 | } | 1265 | } |
1252 | } | 1266 | } |
1253 | pte_unmap(page_table); | 1267 | pte_unmap(page_table); |
@@ -1274,6 +1288,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1274 | /* | 1288 | /* |
1275 | * Re-check the pte - we dropped the lock | 1289 | * Re-check the pte - we dropped the lock |
1276 | */ | 1290 | */ |
1291 | ret = VM_FAULT_MINOR; | ||
1277 | spin_lock(&mm->page_table_lock); | 1292 | spin_lock(&mm->page_table_lock); |
1278 | page_table = pte_offset_map(pmd, address); | 1293 | page_table = pte_offset_map(pmd, address); |
1279 | if (likely(pte_same(*page_table, pte))) { | 1294 | if (likely(pte_same(*page_table, pte))) { |
@@ -1290,12 +1305,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1290 | 1305 | ||
1291 | /* Free the old page.. */ | 1306 | /* Free the old page.. */ |
1292 | new_page = old_page; | 1307 | new_page = old_page; |
1308 | ret |= VM_FAULT_WRITE; | ||
1293 | } | 1309 | } |
1294 | pte_unmap(page_table); | 1310 | pte_unmap(page_table); |
1295 | page_cache_release(new_page); | 1311 | page_cache_release(new_page); |
1296 | page_cache_release(old_page); | 1312 | page_cache_release(old_page); |
1297 | spin_unlock(&mm->page_table_lock); | 1313 | spin_unlock(&mm->page_table_lock); |
1298 | return VM_FAULT_MINOR; | 1314 | return ret; |
1299 | 1315 | ||
1300 | no_new_page: | 1316 | no_new_page: |
1301 | page_cache_release(old_page); | 1317 | page_cache_release(old_page); |
@@ -1987,7 +2003,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
1987 | if (write_access) { | 2003 | if (write_access) { |
1988 | if (!pte_write(entry)) | 2004 | if (!pte_write(entry)) |
1989 | return do_wp_page(mm, vma, address, pte, pmd, entry); | 2005 | return do_wp_page(mm, vma, address, pte, pmd, entry); |
1990 | |||
1991 | entry = pte_mkdirty(entry); | 2006 | entry = pte_mkdirty(entry); |
1992 | } | 2007 | } |
1993 | entry = pte_mkyoung(entry); | 2008 | entry = pte_mkyoung(entry); |
@@ -2002,7 +2017,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2002 | /* | 2017 | /* |
2003 | * By the time we get here, we already hold the mm semaphore | 2018 | * By the time we get here, we already hold the mm semaphore |
2004 | */ | 2019 | */ |
2005 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | 2020 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, |
2006 | unsigned long address, int write_access) | 2021 | unsigned long address, int write_access) |
2007 | { | 2022 | { |
2008 | pgd_t *pgd; | 2023 | pgd_t *pgd; |