diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-08-03 06:24:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-03 12:12:05 -0400 |
commit | f33ea7f404e592e4563b12101b7a4d17da6558d7 (patch) | |
tree | 1d587ad8a06cb6d2e3a187f0312c8a524ffefe53 | |
parent | 5cb4cc0d8211c490537c8568001958fc76741312 (diff) |
[PATCH] fix get_user_pages bug
Checking pte_dirty instead of pte_write in __follow_page is problematic
for s390, and for copy_one_pte which leaves dirty when clearing write.
So revert __follow_page to check pte_write as before, and make
do_wp_page pass back a special extra VM_FAULT_WRITE bit to say it has
done its full job: once get_user_pages receives this value, it no longer
requires pte_write in __follow_page.
But most callers of handle_mm_fault, in the various architectures, have
switch statements which do not expect this new case. To avoid changing
them all in a hurry, make an inline wrapper function (using the old
name) that masks off the new bit, and use the extended interface with
double underscores.
Yes, we do have a call to do_wp_page from do_swap_page, but no need to
change that: in rare case it's needed, another do_wp_page will follow.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
[ Cleanups by Nick Piggin ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/mm.h | 22 | ||||
-rw-r--r-- | mm/memory.c | 31 |
2 files changed, 40 insertions, 13 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6eb7f48317f8..82d7024f0765 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -625,10 +625,16 @@ static inline int page_mapped(struct page *page) | |||
625 | * Used to decide whether a process gets delivered SIGBUS or | 625 | * Used to decide whether a process gets delivered SIGBUS or |
626 | * just gets major/minor fault counters bumped up. | 626 | * just gets major/minor fault counters bumped up. |
627 | */ | 627 | */ |
628 | #define VM_FAULT_OOM (-1) | 628 | #define VM_FAULT_OOM 0x00 |
629 | #define VM_FAULT_SIGBUS 0 | 629 | #define VM_FAULT_SIGBUS 0x01 |
630 | #define VM_FAULT_MINOR 1 | 630 | #define VM_FAULT_MINOR 0x02 |
631 | #define VM_FAULT_MAJOR 2 | 631 | #define VM_FAULT_MAJOR 0x03 |
632 | |||
633 | /* | ||
634 | * Special case for get_user_pages. | ||
635 | * Must be in a distinct bit from the above VM_FAULT_ flags. | ||
636 | */ | ||
637 | #define VM_FAULT_WRITE 0x10 | ||
632 | 638 | ||
633 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) | 639 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) |
634 | 640 | ||
@@ -704,7 +710,13 @@ extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsign | |||
704 | extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); | 710 | extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); |
705 | extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); | 711 | extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); |
706 | extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); | 712 | extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); |
707 | extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); | 713 | extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); |
714 | |||
715 | static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) | ||
716 | { | ||
717 | return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE); | ||
718 | } | ||
719 | |||
708 | extern int make_pages_present(unsigned long addr, unsigned long end); | 720 | extern int make_pages_present(unsigned long addr, unsigned long end); |
709 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); | 721 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); |
710 | void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); | 722 | void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); |
diff --git a/mm/memory.c b/mm/memory.c index 2405289dfdf8..81d7117aa58b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -811,15 +811,18 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, | |||
811 | pte = *ptep; | 811 | pte = *ptep; |
812 | pte_unmap(ptep); | 812 | pte_unmap(ptep); |
813 | if (pte_present(pte)) { | 813 | if (pte_present(pte)) { |
814 | if (write && !pte_dirty(pte)) | 814 | if (write && !pte_write(pte)) |
815 | goto out; | 815 | goto out; |
816 | if (read && !pte_read(pte)) | 816 | if (read && !pte_read(pte)) |
817 | goto out; | 817 | goto out; |
818 | pfn = pte_pfn(pte); | 818 | pfn = pte_pfn(pte); |
819 | if (pfn_valid(pfn)) { | 819 | if (pfn_valid(pfn)) { |
820 | page = pfn_to_page(pfn); | 820 | page = pfn_to_page(pfn); |
821 | if (accessed) | 821 | if (accessed) { |
822 | if (write && !pte_dirty(pte) &&!PageDirty(page)) | ||
823 | set_page_dirty(page); | ||
822 | mark_page_accessed(page); | 824 | mark_page_accessed(page); |
825 | } | ||
823 | return page; | 826 | return page; |
824 | } | 827 | } |
825 | } | 828 | } |
@@ -941,10 +944,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
941 | } | 944 | } |
942 | spin_lock(&mm->page_table_lock); | 945 | spin_lock(&mm->page_table_lock); |
943 | do { | 946 | do { |
947 | int write_access = write; | ||
944 | struct page *page; | 948 | struct page *page; |
945 | 949 | ||
946 | cond_resched_lock(&mm->page_table_lock); | 950 | cond_resched_lock(&mm->page_table_lock); |
947 | while (!(page = follow_page(mm, start, write))) { | 951 | while (!(page = follow_page(mm, start, write_access))) { |
948 | /* | 952 | /* |
949 | * Shortcut for anonymous pages. We don't want | 953 | * Shortcut for anonymous pages. We don't want |
950 | * to force the creation of pages tables for | 954 | * to force the creation of pages tables for |
@@ -957,7 +961,16 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
957 | break; | 961 | break; |
958 | } | 962 | } |
959 | spin_unlock(&mm->page_table_lock); | 963 | spin_unlock(&mm->page_table_lock); |
960 | switch (handle_mm_fault(mm,vma,start,write)) { | 964 | switch (__handle_mm_fault(mm, vma, start, |
965 | write_access)) { | ||
966 | case VM_FAULT_WRITE: | ||
967 | /* | ||
968 | * do_wp_page has broken COW when | ||
969 | * necessary, even if maybe_mkwrite | ||
970 | * decided not to set pte_write | ||
971 | */ | ||
972 | write_access = 0; | ||
973 | /* FALLTHRU */ | ||
961 | case VM_FAULT_MINOR: | 974 | case VM_FAULT_MINOR: |
962 | tsk->min_flt++; | 975 | tsk->min_flt++; |
963 | break; | 976 | break; |
@@ -1220,6 +1233,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1220 | struct page *old_page, *new_page; | 1233 | struct page *old_page, *new_page; |
1221 | unsigned long pfn = pte_pfn(pte); | 1234 | unsigned long pfn = pte_pfn(pte); |
1222 | pte_t entry; | 1235 | pte_t entry; |
1236 | int ret; | ||
1223 | 1237 | ||
1224 | if (unlikely(!pfn_valid(pfn))) { | 1238 | if (unlikely(!pfn_valid(pfn))) { |
1225 | /* | 1239 | /* |
@@ -1247,7 +1261,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1247 | lazy_mmu_prot_update(entry); | 1261 | lazy_mmu_prot_update(entry); |
1248 | pte_unmap(page_table); | 1262 | pte_unmap(page_table); |
1249 | spin_unlock(&mm->page_table_lock); | 1263 | spin_unlock(&mm->page_table_lock); |
1250 | return VM_FAULT_MINOR; | 1264 | return VM_FAULT_MINOR|VM_FAULT_WRITE; |
1251 | } | 1265 | } |
1252 | } | 1266 | } |
1253 | pte_unmap(page_table); | 1267 | pte_unmap(page_table); |
@@ -1274,6 +1288,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1274 | /* | 1288 | /* |
1275 | * Re-check the pte - we dropped the lock | 1289 | * Re-check the pte - we dropped the lock |
1276 | */ | 1290 | */ |
1291 | ret = VM_FAULT_MINOR; | ||
1277 | spin_lock(&mm->page_table_lock); | 1292 | spin_lock(&mm->page_table_lock); |
1278 | page_table = pte_offset_map(pmd, address); | 1293 | page_table = pte_offset_map(pmd, address); |
1279 | if (likely(pte_same(*page_table, pte))) { | 1294 | if (likely(pte_same(*page_table, pte))) { |
@@ -1290,12 +1305,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1290 | 1305 | ||
1291 | /* Free the old page.. */ | 1306 | /* Free the old page.. */ |
1292 | new_page = old_page; | 1307 | new_page = old_page; |
1308 | ret |= VM_FAULT_WRITE; | ||
1293 | } | 1309 | } |
1294 | pte_unmap(page_table); | 1310 | pte_unmap(page_table); |
1295 | page_cache_release(new_page); | 1311 | page_cache_release(new_page); |
1296 | page_cache_release(old_page); | 1312 | page_cache_release(old_page); |
1297 | spin_unlock(&mm->page_table_lock); | 1313 | spin_unlock(&mm->page_table_lock); |
1298 | return VM_FAULT_MINOR; | 1314 | return ret; |
1299 | 1315 | ||
1300 | no_new_page: | 1316 | no_new_page: |
1301 | page_cache_release(old_page); | 1317 | page_cache_release(old_page); |
@@ -1987,7 +2003,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
1987 | if (write_access) { | 2003 | if (write_access) { |
1988 | if (!pte_write(entry)) | 2004 | if (!pte_write(entry)) |
1989 | return do_wp_page(mm, vma, address, pte, pmd, entry); | 2005 | return do_wp_page(mm, vma, address, pte, pmd, entry); |
1990 | |||
1991 | entry = pte_mkdirty(entry); | 2006 | entry = pte_mkdirty(entry); |
1992 | } | 2007 | } |
1993 | entry = pte_mkyoung(entry); | 2008 | entry = pte_mkyoung(entry); |
@@ -2002,7 +2017,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2002 | /* | 2017 | /* |
2003 | * By the time we get here, we already hold the mm semaphore | 2018 | * By the time we get here, we already hold the mm semaphore |
2004 | */ | 2019 | */ |
2005 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | 2020 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, |
2006 | unsigned long address, int write_access) | 2021 | unsigned long address, int write_access) |
2007 | { | 2022 | { |
2008 | pgd_t *pgd; | 2023 | pgd_t *pgd; |