aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-08-03 06:24:01 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-08-03 12:12:05 -0400
commitf33ea7f404e592e4563b12101b7a4d17da6558d7 (patch)
tree1d587ad8a06cb6d2e3a187f0312c8a524ffefe53
parent5cb4cc0d8211c490537c8568001958fc76741312 (diff)
[PATCH] fix get_user_pages bug
Checking pte_dirty instead of pte_write in __follow_page is problematic for s390, and for copy_one_pte which leaves dirty when clearing write. So revert __follow_page to check pte_write as before, and make do_wp_page pass back a special extra VM_FAULT_WRITE bit to say it has done its full job: once get_user_pages receives this value, it no longer requires pte_write in __follow_page. But most callers of handle_mm_fault, in the various architectures, have switch statements which do not expect this new case. To avoid changing them all in a hurry, make an inline wrapper function (using the old name) that masks off the new bit, and use the extended interface with double underscores. Yes, we do have a call to do_wp_page from do_swap_page, but no need to change that: in rare case it's needed, another do_wp_page will follow. Signed-off-by: Hugh Dickins <hugh@veritas.com> [ Cleanups by Nick Piggin ] Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h22
-rw-r--r--mm/memory.c31
2 files changed, 40 insertions, 13 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6eb7f48317f8..82d7024f0765 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -625,10 +625,16 @@ static inline int page_mapped(struct page *page)
625 * Used to decide whether a process gets delivered SIGBUS or 625 * Used to decide whether a process gets delivered SIGBUS or
626 * just gets major/minor fault counters bumped up. 626 * just gets major/minor fault counters bumped up.
627 */ 627 */
628#define VM_FAULT_OOM (-1) 628#define VM_FAULT_OOM 0x00
629#define VM_FAULT_SIGBUS 0 629#define VM_FAULT_SIGBUS 0x01
630#define VM_FAULT_MINOR 1 630#define VM_FAULT_MINOR 0x02
631#define VM_FAULT_MAJOR 2 631#define VM_FAULT_MAJOR 0x03
632
633/*
634 * Special case for get_user_pages.
635 * Must be in a distinct bit from the above VM_FAULT_ flags.
636 */
637#define VM_FAULT_WRITE 0x10
632 638
633#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 639#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
634 640
@@ -704,7 +710,13 @@ extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsign
704extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); 710extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
705extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 711extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
706extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 712extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
707extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); 713extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
714
715static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
716{
717 return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
718}
719
708extern int make_pages_present(unsigned long addr, unsigned long end); 720extern int make_pages_present(unsigned long addr, unsigned long end);
709extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 721extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
710void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); 722void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
diff --git a/mm/memory.c b/mm/memory.c
index 2405289dfdf8..81d7117aa58b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -811,15 +811,18 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
811 pte = *ptep; 811 pte = *ptep;
812 pte_unmap(ptep); 812 pte_unmap(ptep);
813 if (pte_present(pte)) { 813 if (pte_present(pte)) {
814 if (write && !pte_dirty(pte)) 814 if (write && !pte_write(pte))
815 goto out; 815 goto out;
816 if (read && !pte_read(pte)) 816 if (read && !pte_read(pte))
817 goto out; 817 goto out;
818 pfn = pte_pfn(pte); 818 pfn = pte_pfn(pte);
819 if (pfn_valid(pfn)) { 819 if (pfn_valid(pfn)) {
820 page = pfn_to_page(pfn); 820 page = pfn_to_page(pfn);
821 if (accessed) 821 if (accessed) {
822 if (write && !pte_dirty(pte) &&!PageDirty(page))
823 set_page_dirty(page);
822 mark_page_accessed(page); 824 mark_page_accessed(page);
825 }
823 return page; 826 return page;
824 } 827 }
825 } 828 }
@@ -941,10 +944,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
941 } 944 }
942 spin_lock(&mm->page_table_lock); 945 spin_lock(&mm->page_table_lock);
943 do { 946 do {
947 int write_access = write;
944 struct page *page; 948 struct page *page;
945 949
946 cond_resched_lock(&mm->page_table_lock); 950 cond_resched_lock(&mm->page_table_lock);
947 while (!(page = follow_page(mm, start, write))) { 951 while (!(page = follow_page(mm, start, write_access))) {
948 /* 952 /*
949 * Shortcut for anonymous pages. We don't want 953 * Shortcut for anonymous pages. We don't want
950 * to force the creation of pages tables for 954 * to force the creation of pages tables for
@@ -957,7 +961,16 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
957 break; 961 break;
958 } 962 }
959 spin_unlock(&mm->page_table_lock); 963 spin_unlock(&mm->page_table_lock);
960 switch (handle_mm_fault(mm,vma,start,write)) { 964 switch (__handle_mm_fault(mm, vma, start,
965 write_access)) {
966 case VM_FAULT_WRITE:
967 /*
968 * do_wp_page has broken COW when
969 * necessary, even if maybe_mkwrite
970 * decided not to set pte_write
971 */
972 write_access = 0;
973 /* FALLTHRU */
961 case VM_FAULT_MINOR: 974 case VM_FAULT_MINOR:
962 tsk->min_flt++; 975 tsk->min_flt++;
963 break; 976 break;
@@ -1220,6 +1233,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1220 struct page *old_page, *new_page; 1233 struct page *old_page, *new_page;
1221 unsigned long pfn = pte_pfn(pte); 1234 unsigned long pfn = pte_pfn(pte);
1222 pte_t entry; 1235 pte_t entry;
1236 int ret;
1223 1237
1224 if (unlikely(!pfn_valid(pfn))) { 1238 if (unlikely(!pfn_valid(pfn))) {
1225 /* 1239 /*
@@ -1247,7 +1261,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1247 lazy_mmu_prot_update(entry); 1261 lazy_mmu_prot_update(entry);
1248 pte_unmap(page_table); 1262 pte_unmap(page_table);
1249 spin_unlock(&mm->page_table_lock); 1263 spin_unlock(&mm->page_table_lock);
1250 return VM_FAULT_MINOR; 1264 return VM_FAULT_MINOR|VM_FAULT_WRITE;
1251 } 1265 }
1252 } 1266 }
1253 pte_unmap(page_table); 1267 pte_unmap(page_table);
@@ -1274,6 +1288,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1274 /* 1288 /*
1275 * Re-check the pte - we dropped the lock 1289 * Re-check the pte - we dropped the lock
1276 */ 1290 */
1291 ret = VM_FAULT_MINOR;
1277 spin_lock(&mm->page_table_lock); 1292 spin_lock(&mm->page_table_lock);
1278 page_table = pte_offset_map(pmd, address); 1293 page_table = pte_offset_map(pmd, address);
1279 if (likely(pte_same(*page_table, pte))) { 1294 if (likely(pte_same(*page_table, pte))) {
@@ -1290,12 +1305,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1290 1305
1291 /* Free the old page.. */ 1306 /* Free the old page.. */
1292 new_page = old_page; 1307 new_page = old_page;
1308 ret |= VM_FAULT_WRITE;
1293 } 1309 }
1294 pte_unmap(page_table); 1310 pte_unmap(page_table);
1295 page_cache_release(new_page); 1311 page_cache_release(new_page);
1296 page_cache_release(old_page); 1312 page_cache_release(old_page);
1297 spin_unlock(&mm->page_table_lock); 1313 spin_unlock(&mm->page_table_lock);
1298 return VM_FAULT_MINOR; 1314 return ret;
1299 1315
1300no_new_page: 1316no_new_page:
1301 page_cache_release(old_page); 1317 page_cache_release(old_page);
@@ -1987,7 +2003,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
1987 if (write_access) { 2003 if (write_access) {
1988 if (!pte_write(entry)) 2004 if (!pte_write(entry))
1989 return do_wp_page(mm, vma, address, pte, pmd, entry); 2005 return do_wp_page(mm, vma, address, pte, pmd, entry);
1990
1991 entry = pte_mkdirty(entry); 2006 entry = pte_mkdirty(entry);
1992 } 2007 }
1993 entry = pte_mkyoung(entry); 2008 entry = pte_mkyoung(entry);
@@ -2002,7 +2017,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2002/* 2017/*
2003 * By the time we get here, we already hold the mm semaphore 2018 * By the time we get here, we already hold the mm semaphore
2004 */ 2019 */
2005int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, 2020int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2006 unsigned long address, int write_access) 2021 unsigned long address, int write_access)
2007{ 2022{
2008 pgd_t *pgd; 2023 pgd_t *pgd;