diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 73 |
1 files changed, 44 insertions, 29 deletions
diff --git a/mm/memory.c b/mm/memory.c index beabdefa6254..e046b7e4b530 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
| 776 | * Do a quick page-table lookup for a single page. | 776 | * Do a quick page-table lookup for a single page. |
| 777 | * mm->page_table_lock must be held. | 777 | * mm->page_table_lock must be held. |
| 778 | */ | 778 | */ |
| 779 | static struct page * | 779 | static struct page *__follow_page(struct mm_struct *mm, unsigned long address, |
| 780 | __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) | 780 | int read, int write, int accessed) |
| 781 | { | 781 | { |
| 782 | pgd_t *pgd; | 782 | pgd_t *pgd; |
| 783 | pud_t *pud; | 783 | pud_t *pud; |
| @@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) | |||
| 818 | pfn = pte_pfn(pte); | 818 | pfn = pte_pfn(pte); |
| 819 | if (pfn_valid(pfn)) { | 819 | if (pfn_valid(pfn)) { |
| 820 | page = pfn_to_page(pfn); | 820 | page = pfn_to_page(pfn); |
| 821 | if (write && !pte_dirty(pte) && !PageDirty(page)) | 821 | if (accessed) { |
| 822 | set_page_dirty(page); | 822 | if (write && !pte_dirty(pte) &&!PageDirty(page)) |
| 823 | mark_page_accessed(page); | 823 | set_page_dirty(page); |
| 824 | mark_page_accessed(page); | ||
| 825 | } | ||
| 824 | return page; | 826 | return page; |
| 825 | } | 827 | } |
| 826 | } | 828 | } |
| @@ -829,16 +831,19 @@ out: | |||
| 829 | return NULL; | 831 | return NULL; |
| 830 | } | 832 | } |
| 831 | 833 | ||
| 832 | struct page * | 834 | inline struct page * |
| 833 | follow_page(struct mm_struct *mm, unsigned long address, int write) | 835 | follow_page(struct mm_struct *mm, unsigned long address, int write) |
| 834 | { | 836 | { |
| 835 | return __follow_page(mm, address, /*read*/0, write); | 837 | return __follow_page(mm, address, 0, write, 1); |
| 836 | } | 838 | } |
| 837 | 839 | ||
| 838 | int | 840 | /* |
| 839 | check_user_page_readable(struct mm_struct *mm, unsigned long address) | 841 | * check_user_page_readable() can be called frm niterrupt context by oprofile, |
| 842 | * so we need to avoid taking any non-irq-safe locks | ||
| 843 | */ | ||
| 844 | int check_user_page_readable(struct mm_struct *mm, unsigned long address) | ||
| 840 | { | 845 | { |
| 841 | return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; | 846 | return __follow_page(mm, address, 1, 0, 0) != NULL; |
| 842 | } | 847 | } |
| 843 | EXPORT_SYMBOL(check_user_page_readable); | 848 | EXPORT_SYMBOL(check_user_page_readable); |
| 844 | 849 | ||
| @@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 908 | pud = pud_offset(pgd, pg); | 913 | pud = pud_offset(pgd, pg); |
| 909 | BUG_ON(pud_none(*pud)); | 914 | BUG_ON(pud_none(*pud)); |
| 910 | pmd = pmd_offset(pud, pg); | 915 | pmd = pmd_offset(pud, pg); |
| 911 | BUG_ON(pmd_none(*pmd)); | 916 | if (pmd_none(*pmd)) |
| 917 | return i ? : -EFAULT; | ||
| 912 | pte = pte_offset_map(pmd, pg); | 918 | pte = pte_offset_map(pmd, pg); |
| 913 | BUG_ON(pte_none(*pte)); | 919 | if (pte_none(*pte)) { |
| 920 | pte_unmap(pte); | ||
| 921 | return i ? : -EFAULT; | ||
| 922 | } | ||
| 914 | if (pages) { | 923 | if (pages) { |
| 915 | pages[i] = pte_page(*pte); | 924 | pages[i] = pte_page(*pte); |
| 916 | get_page(pages[i]); | 925 | get_page(pages[i]); |
| @@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 935 | } | 944 | } |
| 936 | spin_lock(&mm->page_table_lock); | 945 | spin_lock(&mm->page_table_lock); |
| 937 | do { | 946 | do { |
| 947 | int write_access = write; | ||
| 938 | struct page *page; | 948 | struct page *page; |
| 939 | int lookup_write = write; | ||
| 940 | 949 | ||
| 941 | cond_resched_lock(&mm->page_table_lock); | 950 | cond_resched_lock(&mm->page_table_lock); |
| 942 | while (!(page = follow_page(mm, start, lookup_write))) { | 951 | while (!(page = follow_page(mm, start, write_access))) { |
| 952 | int ret; | ||
| 953 | |||
| 943 | /* | 954 | /* |
| 944 | * Shortcut for anonymous pages. We don't want | 955 | * Shortcut for anonymous pages. We don't want |
| 945 | * to force the creation of pages tables for | 956 | * to force the creation of pages tables for |
| @@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 947 | * nobody touched so far. This is important | 958 | * nobody touched so far. This is important |
| 948 | * for doing a core dump for these mappings. | 959 | * for doing a core dump for these mappings. |
| 949 | */ | 960 | */ |
| 950 | if (!lookup_write && | 961 | if (!write && untouched_anonymous_page(mm,vma,start)) { |
| 951 | untouched_anonymous_page(mm,vma,start)) { | ||
| 952 | page = ZERO_PAGE(start); | 962 | page = ZERO_PAGE(start); |
| 953 | break; | 963 | break; |
| 954 | } | 964 | } |
| 955 | spin_unlock(&mm->page_table_lock); | 965 | spin_unlock(&mm->page_table_lock); |
| 956 | switch (handle_mm_fault(mm,vma,start,write)) { | 966 | ret = __handle_mm_fault(mm, vma, start, write_access); |
| 967 | |||
| 968 | /* | ||
| 969 | * The VM_FAULT_WRITE bit tells us that do_wp_page has | ||
| 970 | * broken COW when necessary, even if maybe_mkwrite | ||
| 971 | * decided not to set pte_write. We can thus safely do | ||
| 972 | * subsequent page lookups as if they were reads. | ||
| 973 | */ | ||
| 974 | if (ret & VM_FAULT_WRITE) | ||
| 975 | write_access = 0; | ||
| 976 | |||
| 977 | switch (ret & ~VM_FAULT_WRITE) { | ||
| 957 | case VM_FAULT_MINOR: | 978 | case VM_FAULT_MINOR: |
| 958 | tsk->min_flt++; | 979 | tsk->min_flt++; |
| 959 | break; | 980 | break; |
| @@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 967 | default: | 988 | default: |
| 968 | BUG(); | 989 | BUG(); |
| 969 | } | 990 | } |
| 970 | /* | ||
| 971 | * Now that we have performed a write fault | ||
| 972 | * and surely no longer have a shared page we | ||
| 973 | * shouldn't write, we shouldn't ignore an | ||
| 974 | * unwritable page in the page table if | ||
| 975 | * we are forcing write access. | ||
| 976 | */ | ||
| 977 | lookup_write = write && !force; | ||
| 978 | spin_lock(&mm->page_table_lock); | 991 | spin_lock(&mm->page_table_lock); |
| 979 | } | 992 | } |
| 980 | if (pages) { | 993 | if (pages) { |
| @@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 1224 | struct page *old_page, *new_page; | 1237 | struct page *old_page, *new_page; |
| 1225 | unsigned long pfn = pte_pfn(pte); | 1238 | unsigned long pfn = pte_pfn(pte); |
| 1226 | pte_t entry; | 1239 | pte_t entry; |
| 1240 | int ret; | ||
| 1227 | 1241 | ||
| 1228 | if (unlikely(!pfn_valid(pfn))) { | 1242 | if (unlikely(!pfn_valid(pfn))) { |
| 1229 | /* | 1243 | /* |
| @@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 1251 | lazy_mmu_prot_update(entry); | 1265 | lazy_mmu_prot_update(entry); |
| 1252 | pte_unmap(page_table); | 1266 | pte_unmap(page_table); |
| 1253 | spin_unlock(&mm->page_table_lock); | 1267 | spin_unlock(&mm->page_table_lock); |
| 1254 | return VM_FAULT_MINOR; | 1268 | return VM_FAULT_MINOR|VM_FAULT_WRITE; |
| 1255 | } | 1269 | } |
| 1256 | } | 1270 | } |
| 1257 | pte_unmap(page_table); | 1271 | pte_unmap(page_table); |
| @@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 1278 | /* | 1292 | /* |
| 1279 | * Re-check the pte - we dropped the lock | 1293 | * Re-check the pte - we dropped the lock |
| 1280 | */ | 1294 | */ |
| 1295 | ret = VM_FAULT_MINOR; | ||
| 1281 | spin_lock(&mm->page_table_lock); | 1296 | spin_lock(&mm->page_table_lock); |
| 1282 | page_table = pte_offset_map(pmd, address); | 1297 | page_table = pte_offset_map(pmd, address); |
| 1283 | if (likely(pte_same(*page_table, pte))) { | 1298 | if (likely(pte_same(*page_table, pte))) { |
| @@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 1294 | 1309 | ||
| 1295 | /* Free the old page.. */ | 1310 | /* Free the old page.. */ |
| 1296 | new_page = old_page; | 1311 | new_page = old_page; |
| 1312 | ret |= VM_FAULT_WRITE; | ||
| 1297 | } | 1313 | } |
| 1298 | pte_unmap(page_table); | 1314 | pte_unmap(page_table); |
| 1299 | page_cache_release(new_page); | 1315 | page_cache_release(new_page); |
| 1300 | page_cache_release(old_page); | 1316 | page_cache_release(old_page); |
| 1301 | spin_unlock(&mm->page_table_lock); | 1317 | spin_unlock(&mm->page_table_lock); |
| 1302 | return VM_FAULT_MINOR; | 1318 | return ret; |
| 1303 | 1319 | ||
| 1304 | no_new_page: | 1320 | no_new_page: |
| 1305 | page_cache_release(old_page); | 1321 | page_cache_release(old_page); |
| @@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 1991 | if (write_access) { | 2007 | if (write_access) { |
| 1992 | if (!pte_write(entry)) | 2008 | if (!pte_write(entry)) |
| 1993 | return do_wp_page(mm, vma, address, pte, pmd, entry); | 2009 | return do_wp_page(mm, vma, address, pte, pmd, entry); |
| 1994 | |||
| 1995 | entry = pte_mkdirty(entry); | 2010 | entry = pte_mkdirty(entry); |
| 1996 | } | 2011 | } |
| 1997 | entry = pte_mkyoung(entry); | 2012 | entry = pte_mkyoung(entry); |
| @@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 2006 | /* | 2021 | /* |
| 2007 | * By the time we get here, we already hold the mm semaphore | 2022 | * By the time we get here, we already hold the mm semaphore |
| 2008 | */ | 2023 | */ |
| 2009 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | 2024 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, |
| 2010 | unsigned long address, int write_access) | 2025 | unsigned long address, int write_access) |
| 2011 | { | 2026 | { |
| 2012 | pgd_t *pgd; | 2027 | pgd_t *pgd; |
