aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c73
1 files changed, 44 insertions, 29 deletions
diff --git a/mm/memory.c b/mm/memory.c
index beabdefa6254..e046b7e4b530 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
776 * Do a quick page-table lookup for a single page. 776 * Do a quick page-table lookup for a single page.
777 * mm->page_table_lock must be held. 777 * mm->page_table_lock must be held.
778 */ 778 */
779static struct page * 779static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
780__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) 780 int read, int write, int accessed)
781{ 781{
782 pgd_t *pgd; 782 pgd_t *pgd;
783 pud_t *pud; 783 pud_t *pud;
@@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
818 pfn = pte_pfn(pte); 818 pfn = pte_pfn(pte);
819 if (pfn_valid(pfn)) { 819 if (pfn_valid(pfn)) {
820 page = pfn_to_page(pfn); 820 page = pfn_to_page(pfn);
821 if (write && !pte_dirty(pte) && !PageDirty(page)) 821 if (accessed) {
822 set_page_dirty(page); 822 if (write && !pte_dirty(pte) &&!PageDirty(page))
823 mark_page_accessed(page); 823 set_page_dirty(page);
824 mark_page_accessed(page);
825 }
824 return page; 826 return page;
825 } 827 }
826 } 828 }
@@ -829,16 +831,19 @@ out:
829 return NULL; 831 return NULL;
830} 832}
831 833
832struct page * 834inline struct page *
833follow_page(struct mm_struct *mm, unsigned long address, int write) 835follow_page(struct mm_struct *mm, unsigned long address, int write)
834{ 836{
835 return __follow_page(mm, address, /*read*/0, write); 837 return __follow_page(mm, address, 0, write, 1);
836} 838}
837 839
838int 840/*
839check_user_page_readable(struct mm_struct *mm, unsigned long address) 841 * check_user_page_readable() can be called frm niterrupt context by oprofile,
842 * so we need to avoid taking any non-irq-safe locks
843 */
844int check_user_page_readable(struct mm_struct *mm, unsigned long address)
840{ 845{
841 return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; 846 return __follow_page(mm, address, 1, 0, 0) != NULL;
842} 847}
843EXPORT_SYMBOL(check_user_page_readable); 848EXPORT_SYMBOL(check_user_page_readable);
844 849
@@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
908 pud = pud_offset(pgd, pg); 913 pud = pud_offset(pgd, pg);
909 BUG_ON(pud_none(*pud)); 914 BUG_ON(pud_none(*pud));
910 pmd = pmd_offset(pud, pg); 915 pmd = pmd_offset(pud, pg);
911 BUG_ON(pmd_none(*pmd)); 916 if (pmd_none(*pmd))
917 return i ? : -EFAULT;
912 pte = pte_offset_map(pmd, pg); 918 pte = pte_offset_map(pmd, pg);
913 BUG_ON(pte_none(*pte)); 919 if (pte_none(*pte)) {
920 pte_unmap(pte);
921 return i ? : -EFAULT;
922 }
914 if (pages) { 923 if (pages) {
915 pages[i] = pte_page(*pte); 924 pages[i] = pte_page(*pte);
916 get_page(pages[i]); 925 get_page(pages[i]);
@@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
935 } 944 }
936 spin_lock(&mm->page_table_lock); 945 spin_lock(&mm->page_table_lock);
937 do { 946 do {
947 int write_access = write;
938 struct page *page; 948 struct page *page;
939 int lookup_write = write;
940 949
941 cond_resched_lock(&mm->page_table_lock); 950 cond_resched_lock(&mm->page_table_lock);
942 while (!(page = follow_page(mm, start, lookup_write))) { 951 while (!(page = follow_page(mm, start, write_access))) {
952 int ret;
953
943 /* 954 /*
944 * Shortcut for anonymous pages. We don't want 955 * Shortcut for anonymous pages. We don't want
945 * to force the creation of pages tables for 956 * to force the creation of pages tables for
@@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
947 * nobody touched so far. This is important 958 * nobody touched so far. This is important
948 * for doing a core dump for these mappings. 959 * for doing a core dump for these mappings.
949 */ 960 */
950 if (!lookup_write && 961 if (!write && untouched_anonymous_page(mm,vma,start)) {
951 untouched_anonymous_page(mm,vma,start)) {
952 page = ZERO_PAGE(start); 962 page = ZERO_PAGE(start);
953 break; 963 break;
954 } 964 }
955 spin_unlock(&mm->page_table_lock); 965 spin_unlock(&mm->page_table_lock);
956 switch (handle_mm_fault(mm,vma,start,write)) { 966 ret = __handle_mm_fault(mm, vma, start, write_access);
967
968 /*
969 * The VM_FAULT_WRITE bit tells us that do_wp_page has
970 * broken COW when necessary, even if maybe_mkwrite
971 * decided not to set pte_write. We can thus safely do
972 * subsequent page lookups as if they were reads.
973 */
974 if (ret & VM_FAULT_WRITE)
975 write_access = 0;
976
977 switch (ret & ~VM_FAULT_WRITE) {
957 case VM_FAULT_MINOR: 978 case VM_FAULT_MINOR:
958 tsk->min_flt++; 979 tsk->min_flt++;
959 break; 980 break;
@@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
967 default: 988 default:
968 BUG(); 989 BUG();
969 } 990 }
970 /*
971 * Now that we have performed a write fault
972 * and surely no longer have a shared page we
973 * shouldn't write, we shouldn't ignore an
974 * unwritable page in the page table if
975 * we are forcing write access.
976 */
977 lookup_write = write && !force;
978 spin_lock(&mm->page_table_lock); 991 spin_lock(&mm->page_table_lock);
979 } 992 }
980 if (pages) { 993 if (pages) {
@@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1224 struct page *old_page, *new_page; 1237 struct page *old_page, *new_page;
1225 unsigned long pfn = pte_pfn(pte); 1238 unsigned long pfn = pte_pfn(pte);
1226 pte_t entry; 1239 pte_t entry;
1240 int ret;
1227 1241
1228 if (unlikely(!pfn_valid(pfn))) { 1242 if (unlikely(!pfn_valid(pfn))) {
1229 /* 1243 /*
@@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1251 lazy_mmu_prot_update(entry); 1265 lazy_mmu_prot_update(entry);
1252 pte_unmap(page_table); 1266 pte_unmap(page_table);
1253 spin_unlock(&mm->page_table_lock); 1267 spin_unlock(&mm->page_table_lock);
1254 return VM_FAULT_MINOR; 1268 return VM_FAULT_MINOR|VM_FAULT_WRITE;
1255 } 1269 }
1256 } 1270 }
1257 pte_unmap(page_table); 1271 pte_unmap(page_table);
@@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1278 /* 1292 /*
1279 * Re-check the pte - we dropped the lock 1293 * Re-check the pte - we dropped the lock
1280 */ 1294 */
1295 ret = VM_FAULT_MINOR;
1281 spin_lock(&mm->page_table_lock); 1296 spin_lock(&mm->page_table_lock);
1282 page_table = pte_offset_map(pmd, address); 1297 page_table = pte_offset_map(pmd, address);
1283 if (likely(pte_same(*page_table, pte))) { 1298 if (likely(pte_same(*page_table, pte))) {
@@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
1294 1309
1295 /* Free the old page.. */ 1310 /* Free the old page.. */
1296 new_page = old_page; 1311 new_page = old_page;
1312 ret |= VM_FAULT_WRITE;
1297 } 1313 }
1298 pte_unmap(page_table); 1314 pte_unmap(page_table);
1299 page_cache_release(new_page); 1315 page_cache_release(new_page);
1300 page_cache_release(old_page); 1316 page_cache_release(old_page);
1301 spin_unlock(&mm->page_table_lock); 1317 spin_unlock(&mm->page_table_lock);
1302 return VM_FAULT_MINOR; 1318 return ret;
1303 1319
1304no_new_page: 1320no_new_page:
1305 page_cache_release(old_page); 1321 page_cache_release(old_page);
@@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
1991 if (write_access) { 2007 if (write_access) {
1992 if (!pte_write(entry)) 2008 if (!pte_write(entry))
1993 return do_wp_page(mm, vma, address, pte, pmd, entry); 2009 return do_wp_page(mm, vma, address, pte, pmd, entry);
1994
1995 entry = pte_mkdirty(entry); 2010 entry = pte_mkdirty(entry);
1996 } 2011 }
1997 entry = pte_mkyoung(entry); 2012 entry = pte_mkyoung(entry);
@@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2006/* 2021/*
2007 * By the time we get here, we already hold the mm semaphore 2022 * By the time we get here, we already hold the mm semaphore
2008 */ 2023 */
2009int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, 2024int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2010 unsigned long address, int write_access) 2025 unsigned long address, int write_access)
2011{ 2026{
2012 pgd_t *pgd; 2027 pgd_t *pgd;