diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 73 |
1 files changed, 44 insertions, 29 deletions
diff --git a/mm/memory.c b/mm/memory.c index beabdefa6254..e046b7e4b530 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
776 | * Do a quick page-table lookup for a single page. | 776 | * Do a quick page-table lookup for a single page. |
777 | * mm->page_table_lock must be held. | 777 | * mm->page_table_lock must be held. |
778 | */ | 778 | */ |
779 | static struct page * | 779 | static struct page *__follow_page(struct mm_struct *mm, unsigned long address, |
780 | __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) | 780 | int read, int write, int accessed) |
781 | { | 781 | { |
782 | pgd_t *pgd; | 782 | pgd_t *pgd; |
783 | pud_t *pud; | 783 | pud_t *pud; |
@@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) | |||
818 | pfn = pte_pfn(pte); | 818 | pfn = pte_pfn(pte); |
819 | if (pfn_valid(pfn)) { | 819 | if (pfn_valid(pfn)) { |
820 | page = pfn_to_page(pfn); | 820 | page = pfn_to_page(pfn); |
821 | if (write && !pte_dirty(pte) && !PageDirty(page)) | 821 | if (accessed) { |
822 | set_page_dirty(page); | 822 | if (write && !pte_dirty(pte) &&!PageDirty(page)) |
823 | mark_page_accessed(page); | 823 | set_page_dirty(page); |
824 | mark_page_accessed(page); | ||
825 | } | ||
824 | return page; | 826 | return page; |
825 | } | 827 | } |
826 | } | 828 | } |
@@ -829,16 +831,19 @@ out: | |||
829 | return NULL; | 831 | return NULL; |
830 | } | 832 | } |
831 | 833 | ||
832 | struct page * | 834 | inline struct page * |
833 | follow_page(struct mm_struct *mm, unsigned long address, int write) | 835 | follow_page(struct mm_struct *mm, unsigned long address, int write) |
834 | { | 836 | { |
835 | return __follow_page(mm, address, /*read*/0, write); | 837 | return __follow_page(mm, address, 0, write, 1); |
836 | } | 838 | } |
837 | 839 | ||
838 | int | 840 | /* |
839 | check_user_page_readable(struct mm_struct *mm, unsigned long address) | 841 | * check_user_page_readable() can be called frm niterrupt context by oprofile, |
842 | * so we need to avoid taking any non-irq-safe locks | ||
843 | */ | ||
844 | int check_user_page_readable(struct mm_struct *mm, unsigned long address) | ||
840 | { | 845 | { |
841 | return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; | 846 | return __follow_page(mm, address, 1, 0, 0) != NULL; |
842 | } | 847 | } |
843 | EXPORT_SYMBOL(check_user_page_readable); | 848 | EXPORT_SYMBOL(check_user_page_readable); |
844 | 849 | ||
@@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
908 | pud = pud_offset(pgd, pg); | 913 | pud = pud_offset(pgd, pg); |
909 | BUG_ON(pud_none(*pud)); | 914 | BUG_ON(pud_none(*pud)); |
910 | pmd = pmd_offset(pud, pg); | 915 | pmd = pmd_offset(pud, pg); |
911 | BUG_ON(pmd_none(*pmd)); | 916 | if (pmd_none(*pmd)) |
917 | return i ? : -EFAULT; | ||
912 | pte = pte_offset_map(pmd, pg); | 918 | pte = pte_offset_map(pmd, pg); |
913 | BUG_ON(pte_none(*pte)); | 919 | if (pte_none(*pte)) { |
920 | pte_unmap(pte); | ||
921 | return i ? : -EFAULT; | ||
922 | } | ||
914 | if (pages) { | 923 | if (pages) { |
915 | pages[i] = pte_page(*pte); | 924 | pages[i] = pte_page(*pte); |
916 | get_page(pages[i]); | 925 | get_page(pages[i]); |
@@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
935 | } | 944 | } |
936 | spin_lock(&mm->page_table_lock); | 945 | spin_lock(&mm->page_table_lock); |
937 | do { | 946 | do { |
947 | int write_access = write; | ||
938 | struct page *page; | 948 | struct page *page; |
939 | int lookup_write = write; | ||
940 | 949 | ||
941 | cond_resched_lock(&mm->page_table_lock); | 950 | cond_resched_lock(&mm->page_table_lock); |
942 | while (!(page = follow_page(mm, start, lookup_write))) { | 951 | while (!(page = follow_page(mm, start, write_access))) { |
952 | int ret; | ||
953 | |||
943 | /* | 954 | /* |
944 | * Shortcut for anonymous pages. We don't want | 955 | * Shortcut for anonymous pages. We don't want |
945 | * to force the creation of pages tables for | 956 | * to force the creation of pages tables for |
@@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
947 | * nobody touched so far. This is important | 958 | * nobody touched so far. This is important |
948 | * for doing a core dump for these mappings. | 959 | * for doing a core dump for these mappings. |
949 | */ | 960 | */ |
950 | if (!lookup_write && | 961 | if (!write && untouched_anonymous_page(mm,vma,start)) { |
951 | untouched_anonymous_page(mm,vma,start)) { | ||
952 | page = ZERO_PAGE(start); | 962 | page = ZERO_PAGE(start); |
953 | break; | 963 | break; |
954 | } | 964 | } |
955 | spin_unlock(&mm->page_table_lock); | 965 | spin_unlock(&mm->page_table_lock); |
956 | switch (handle_mm_fault(mm,vma,start,write)) { | 966 | ret = __handle_mm_fault(mm, vma, start, write_access); |
967 | |||
968 | /* | ||
969 | * The VM_FAULT_WRITE bit tells us that do_wp_page has | ||
970 | * broken COW when necessary, even if maybe_mkwrite | ||
971 | * decided not to set pte_write. We can thus safely do | ||
972 | * subsequent page lookups as if they were reads. | ||
973 | */ | ||
974 | if (ret & VM_FAULT_WRITE) | ||
975 | write_access = 0; | ||
976 | |||
977 | switch (ret & ~VM_FAULT_WRITE) { | ||
957 | case VM_FAULT_MINOR: | 978 | case VM_FAULT_MINOR: |
958 | tsk->min_flt++; | 979 | tsk->min_flt++; |
959 | break; | 980 | break; |
@@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
967 | default: | 988 | default: |
968 | BUG(); | 989 | BUG(); |
969 | } | 990 | } |
970 | /* | ||
971 | * Now that we have performed a write fault | ||
972 | * and surely no longer have a shared page we | ||
973 | * shouldn't write, we shouldn't ignore an | ||
974 | * unwritable page in the page table if | ||
975 | * we are forcing write access. | ||
976 | */ | ||
977 | lookup_write = write && !force; | ||
978 | spin_lock(&mm->page_table_lock); | 991 | spin_lock(&mm->page_table_lock); |
979 | } | 992 | } |
980 | if (pages) { | 993 | if (pages) { |
@@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1224 | struct page *old_page, *new_page; | 1237 | struct page *old_page, *new_page; |
1225 | unsigned long pfn = pte_pfn(pte); | 1238 | unsigned long pfn = pte_pfn(pte); |
1226 | pte_t entry; | 1239 | pte_t entry; |
1240 | int ret; | ||
1227 | 1241 | ||
1228 | if (unlikely(!pfn_valid(pfn))) { | 1242 | if (unlikely(!pfn_valid(pfn))) { |
1229 | /* | 1243 | /* |
@@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1251 | lazy_mmu_prot_update(entry); | 1265 | lazy_mmu_prot_update(entry); |
1252 | pte_unmap(page_table); | 1266 | pte_unmap(page_table); |
1253 | spin_unlock(&mm->page_table_lock); | 1267 | spin_unlock(&mm->page_table_lock); |
1254 | return VM_FAULT_MINOR; | 1268 | return VM_FAULT_MINOR|VM_FAULT_WRITE; |
1255 | } | 1269 | } |
1256 | } | 1270 | } |
1257 | pte_unmap(page_table); | 1271 | pte_unmap(page_table); |
@@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1278 | /* | 1292 | /* |
1279 | * Re-check the pte - we dropped the lock | 1293 | * Re-check the pte - we dropped the lock |
1280 | */ | 1294 | */ |
1295 | ret = VM_FAULT_MINOR; | ||
1281 | spin_lock(&mm->page_table_lock); | 1296 | spin_lock(&mm->page_table_lock); |
1282 | page_table = pte_offset_map(pmd, address); | 1297 | page_table = pte_offset_map(pmd, address); |
1283 | if (likely(pte_same(*page_table, pte))) { | 1298 | if (likely(pte_same(*page_table, pte))) { |
@@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1294 | 1309 | ||
1295 | /* Free the old page.. */ | 1310 | /* Free the old page.. */ |
1296 | new_page = old_page; | 1311 | new_page = old_page; |
1312 | ret |= VM_FAULT_WRITE; | ||
1297 | } | 1313 | } |
1298 | pte_unmap(page_table); | 1314 | pte_unmap(page_table); |
1299 | page_cache_release(new_page); | 1315 | page_cache_release(new_page); |
1300 | page_cache_release(old_page); | 1316 | page_cache_release(old_page); |
1301 | spin_unlock(&mm->page_table_lock); | 1317 | spin_unlock(&mm->page_table_lock); |
1302 | return VM_FAULT_MINOR; | 1318 | return ret; |
1303 | 1319 | ||
1304 | no_new_page: | 1320 | no_new_page: |
1305 | page_cache_release(old_page); | 1321 | page_cache_release(old_page); |
@@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
1991 | if (write_access) { | 2007 | if (write_access) { |
1992 | if (!pte_write(entry)) | 2008 | if (!pte_write(entry)) |
1993 | return do_wp_page(mm, vma, address, pte, pmd, entry); | 2009 | return do_wp_page(mm, vma, address, pte, pmd, entry); |
1994 | |||
1995 | entry = pte_mkdirty(entry); | 2010 | entry = pte_mkdirty(entry); |
1996 | } | 2011 | } |
1997 | entry = pte_mkyoung(entry); | 2012 | entry = pte_mkyoung(entry); |
@@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2006 | /* | 2021 | /* |
2007 | * By the time we get here, we already hold the mm semaphore | 2022 | * By the time we get here, we already hold the mm semaphore |
2008 | */ | 2023 | */ |
2009 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | 2024 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, |
2010 | unsigned long address, int write_access) | 2025 | unsigned long address, int write_access) |
2011 | { | 2026 | { |
2012 | pgd_t *pgd; | 2027 | pgd_t *pgd; |