diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 204 |
1 files changed, 145 insertions, 59 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7b9db658aca2..e009ce870859 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -52,6 +52,9 @@ | |||
52 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
53 | #include <linux/memcontrol.h> | 53 | #include <linux/memcontrol.h> |
54 | #include <linux/mmu_notifier.h> | 54 | #include <linux/mmu_notifier.h> |
55 | #include <linux/kallsyms.h> | ||
56 | #include <linux/swapops.h> | ||
57 | #include <linux/elf.h> | ||
55 | 58 | ||
56 | #include <asm/pgalloc.h> | 59 | #include <asm/pgalloc.h> |
57 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
@@ -59,9 +62,6 @@ | |||
59 | #include <asm/tlbflush.h> | 62 | #include <asm/tlbflush.h> |
60 | #include <asm/pgtable.h> | 63 | #include <asm/pgtable.h> |
61 | 64 | ||
62 | #include <linux/swapops.h> | ||
63 | #include <linux/elf.h> | ||
64 | |||
65 | #include "internal.h" | 65 | #include "internal.h" |
66 | 66 | ||
67 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 67 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
@@ -375,15 +375,65 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
375 | * | 375 | * |
376 | * The calling function must still handle the error. | 376 | * The calling function must still handle the error. |
377 | */ | 377 | */ |
378 | static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, | 378 | static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, |
379 | unsigned long vaddr) | 379 | pte_t pte, struct page *page) |
380 | { | 380 | { |
381 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " | 381 | pgd_t *pgd = pgd_offset(vma->vm_mm, addr); |
382 | "vm_flags = %lx, vaddr = %lx\n", | 382 | pud_t *pud = pud_offset(pgd, addr); |
383 | (long long)pte_val(pte), | 383 | pmd_t *pmd = pmd_offset(pud, addr); |
384 | (vma->vm_mm == current->mm ? current->comm : "???"), | 384 | struct address_space *mapping; |
385 | vma->vm_flags, vaddr); | 385 | pgoff_t index; |
386 | static unsigned long resume; | ||
387 | static unsigned long nr_shown; | ||
388 | static unsigned long nr_unshown; | ||
389 | |||
390 | /* | ||
391 | * Allow a burst of 60 reports, then keep quiet for that minute; | ||
392 | * or allow a steady drip of one report per second. | ||
393 | */ | ||
394 | if (nr_shown == 60) { | ||
395 | if (time_before(jiffies, resume)) { | ||
396 | nr_unshown++; | ||
397 | return; | ||
398 | } | ||
399 | if (nr_unshown) { | ||
400 | printk(KERN_ALERT | ||
401 | "BUG: Bad page map: %lu messages suppressed\n", | ||
402 | nr_unshown); | ||
403 | nr_unshown = 0; | ||
404 | } | ||
405 | nr_shown = 0; | ||
406 | } | ||
407 | if (nr_shown++ == 0) | ||
408 | resume = jiffies + 60 * HZ; | ||
409 | |||
410 | mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; | ||
411 | index = linear_page_index(vma, addr); | ||
412 | |||
413 | printk(KERN_ALERT | ||
414 | "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", | ||
415 | current->comm, | ||
416 | (long long)pte_val(pte), (long long)pmd_val(*pmd)); | ||
417 | if (page) { | ||
418 | printk(KERN_ALERT | ||
419 | "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", | ||
420 | page, (void *)page->flags, page_count(page), | ||
421 | page_mapcount(page), page->mapping, page->index); | ||
422 | } | ||
423 | printk(KERN_ALERT | ||
424 | "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", | ||
425 | (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); | ||
426 | /* | ||
427 | * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y | ||
428 | */ | ||
429 | if (vma->vm_ops) | ||
430 | print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n", | ||
431 | (unsigned long)vma->vm_ops->fault); | ||
432 | if (vma->vm_file && vma->vm_file->f_op) | ||
433 | print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n", | ||
434 | (unsigned long)vma->vm_file->f_op->mmap); | ||
386 | dump_stack(); | 435 | dump_stack(); |
436 | add_taint(TAINT_BAD_PAGE); | ||
387 | } | 437 | } |
388 | 438 | ||
389 | static inline int is_cow_mapping(unsigned int flags) | 439 | static inline int is_cow_mapping(unsigned int flags) |
@@ -441,21 +491,18 @@ static inline int is_cow_mapping(unsigned int flags) | |||
441 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | 491 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
442 | pte_t pte) | 492 | pte_t pte) |
443 | { | 493 | { |
444 | unsigned long pfn; | 494 | unsigned long pfn = pte_pfn(pte); |
445 | 495 | ||
446 | if (HAVE_PTE_SPECIAL) { | 496 | if (HAVE_PTE_SPECIAL) { |
447 | if (likely(!pte_special(pte))) { | 497 | if (likely(!pte_special(pte))) |
448 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 498 | goto check_pfn; |
449 | return pte_page(pte); | 499 | if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) |
450 | } | 500 | print_bad_pte(vma, addr, pte, NULL); |
451 | VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); | ||
452 | return NULL; | 501 | return NULL; |
453 | } | 502 | } |
454 | 503 | ||
455 | /* !HAVE_PTE_SPECIAL case follows: */ | 504 | /* !HAVE_PTE_SPECIAL case follows: */ |
456 | 505 | ||
457 | pfn = pte_pfn(pte); | ||
458 | |||
459 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { | 506 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { |
460 | if (vma->vm_flags & VM_MIXEDMAP) { | 507 | if (vma->vm_flags & VM_MIXEDMAP) { |
461 | if (!pfn_valid(pfn)) | 508 | if (!pfn_valid(pfn)) |
@@ -471,11 +518,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
471 | } | 518 | } |
472 | } | 519 | } |
473 | 520 | ||
474 | VM_BUG_ON(!pfn_valid(pfn)); | 521 | check_pfn: |
522 | if (unlikely(pfn > highest_memmap_pfn)) { | ||
523 | print_bad_pte(vma, addr, pte, NULL); | ||
524 | return NULL; | ||
525 | } | ||
475 | 526 | ||
476 | /* | 527 | /* |
477 | * NOTE! We still have PageReserved() pages in the page tables. | 528 | * NOTE! We still have PageReserved() pages in the page tables. |
478 | * | ||
479 | * eg. VDSO mappings can cause them to exist. | 529 | * eg. VDSO mappings can cause them to exist. |
480 | */ | 530 | */ |
481 | out: | 531 | out: |
@@ -767,11 +817,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
767 | else { | 817 | else { |
768 | if (pte_dirty(ptent)) | 818 | if (pte_dirty(ptent)) |
769 | set_page_dirty(page); | 819 | set_page_dirty(page); |
770 | if (pte_young(ptent)) | 820 | if (pte_young(ptent) && |
771 | SetPageReferenced(page); | 821 | likely(!VM_SequentialReadHint(vma))) |
822 | mark_page_accessed(page); | ||
772 | file_rss--; | 823 | file_rss--; |
773 | } | 824 | } |
774 | page_remove_rmap(page, vma); | 825 | page_remove_rmap(page); |
826 | if (unlikely(page_mapcount(page) < 0)) | ||
827 | print_bad_pte(vma, addr, ptent, page); | ||
775 | tlb_remove_page(tlb, page); | 828 | tlb_remove_page(tlb, page); |
776 | continue; | 829 | continue; |
777 | } | 830 | } |
@@ -781,8 +834,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
781 | */ | 834 | */ |
782 | if (unlikely(details)) | 835 | if (unlikely(details)) |
783 | continue; | 836 | continue; |
784 | if (!pte_file(ptent)) | 837 | if (pte_file(ptent)) { |
785 | free_swap_and_cache(pte_to_swp_entry(ptent)); | 838 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) |
839 | print_bad_pte(vma, addr, ptent, NULL); | ||
840 | } else if | ||
841 | (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) | ||
842 | print_bad_pte(vma, addr, ptent, NULL); | ||
786 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); | 843 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); |
787 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); | 844 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); |
788 | 845 | ||
@@ -1153,6 +1210,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1153 | int write = !!(flags & GUP_FLAGS_WRITE); | 1210 | int write = !!(flags & GUP_FLAGS_WRITE); |
1154 | int force = !!(flags & GUP_FLAGS_FORCE); | 1211 | int force = !!(flags & GUP_FLAGS_FORCE); |
1155 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); | 1212 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); |
1213 | int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); | ||
1156 | 1214 | ||
1157 | if (len <= 0) | 1215 | if (len <= 0) |
1158 | return 0; | 1216 | return 0; |
@@ -1231,12 +1289,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1231 | struct page *page; | 1289 | struct page *page; |
1232 | 1290 | ||
1233 | /* | 1291 | /* |
1234 | * If tsk is ooming, cut off its access to large memory | 1292 | * If we have a pending SIGKILL, don't keep faulting |
1235 | * allocations. It has a pending SIGKILL, but it can't | 1293 | * pages and potentially allocating memory, unless |
1236 | * be processed until returning to user space. | 1294 | * current is handling munlock--e.g., on exit. In |
1295 | * that case, we are not allocating memory. Rather, | ||
1296 | * we're only unlocking already resident/mapped pages. | ||
1237 | */ | 1297 | */ |
1238 | if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) | 1298 | if (unlikely(!ignore_sigkill && |
1239 | return i ? i : -ENOMEM; | 1299 | fatal_signal_pending(current))) |
1300 | return i ? i : -ERESTARTSYS; | ||
1240 | 1301 | ||
1241 | if (write) | 1302 | if (write) |
1242 | foll_flags |= FOLL_WRITE; | 1303 | foll_flags |= FOLL_WRITE; |
@@ -1263,9 +1324,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1263 | * do_wp_page has broken COW when necessary, | 1324 | * do_wp_page has broken COW when necessary, |
1264 | * even if maybe_mkwrite decided not to set | 1325 | * even if maybe_mkwrite decided not to set |
1265 | * pte_write. We can thus safely do subsequent | 1326 | * pte_write. We can thus safely do subsequent |
1266 | * page lookups as if they were reads. | 1327 | * page lookups as if they were reads. But only |
1328 | * do so when looping for pte_write is futile: | ||
1329 | * in some cases userspace may also be wanting | ||
1330 | * to write to the gotten user page, which a | ||
1331 | * read fault here might prevent (a readonly | ||
1332 | * page might get reCOWed by userspace write). | ||
1267 | */ | 1333 | */ |
1268 | if (ret & VM_FAULT_WRITE) | 1334 | if ((ret & VM_FAULT_WRITE) && |
1335 | !(vma->vm_flags & VM_WRITE)) | ||
1269 | foll_flags &= ~FOLL_WRITE; | 1336 | foll_flags &= ~FOLL_WRITE; |
1270 | 1337 | ||
1271 | cond_resched(); | 1338 | cond_resched(); |
@@ -1644,6 +1711,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1644 | 1711 | ||
1645 | BUG_ON(pmd_huge(*pmd)); | 1712 | BUG_ON(pmd_huge(*pmd)); |
1646 | 1713 | ||
1714 | arch_enter_lazy_mmu_mode(); | ||
1715 | |||
1647 | token = pmd_pgtable(*pmd); | 1716 | token = pmd_pgtable(*pmd); |
1648 | 1717 | ||
1649 | do { | 1718 | do { |
@@ -1652,6 +1721,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1652 | break; | 1721 | break; |
1653 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1722 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1654 | 1723 | ||
1724 | arch_leave_lazy_mmu_mode(); | ||
1725 | |||
1655 | if (mm != &init_mm) | 1726 | if (mm != &init_mm) |
1656 | pte_unmap_unlock(pte-1, ptl); | 1727 | pte_unmap_unlock(pte-1, ptl); |
1657 | return err; | 1728 | return err; |
@@ -1837,10 +1908,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1837 | * not dirty accountable. | 1908 | * not dirty accountable. |
1838 | */ | 1909 | */ |
1839 | if (PageAnon(old_page)) { | 1910 | if (PageAnon(old_page)) { |
1840 | if (trylock_page(old_page)) { | 1911 | if (!trylock_page(old_page)) { |
1841 | reuse = can_share_swap_page(old_page); | 1912 | page_cache_get(old_page); |
1842 | unlock_page(old_page); | 1913 | pte_unmap_unlock(page_table, ptl); |
1914 | lock_page(old_page); | ||
1915 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
1916 | &ptl); | ||
1917 | if (!pte_same(*page_table, orig_pte)) { | ||
1918 | unlock_page(old_page); | ||
1919 | page_cache_release(old_page); | ||
1920 | goto unlock; | ||
1921 | } | ||
1922 | page_cache_release(old_page); | ||
1843 | } | 1923 | } |
1924 | reuse = reuse_swap_page(old_page); | ||
1925 | unlock_page(old_page); | ||
1844 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | 1926 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == |
1845 | (VM_WRITE|VM_SHARED))) { | 1927 | (VM_WRITE|VM_SHARED))) { |
1846 | /* | 1928 | /* |
@@ -1918,7 +2000,7 @@ gotten: | |||
1918 | cow_user_page(new_page, old_page, address, vma); | 2000 | cow_user_page(new_page, old_page, address, vma); |
1919 | __SetPageUptodate(new_page); | 2001 | __SetPageUptodate(new_page); |
1920 | 2002 | ||
1921 | if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) | 2003 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
1922 | goto oom_free_new; | 2004 | goto oom_free_new; |
1923 | 2005 | ||
1924 | /* | 2006 | /* |
@@ -1943,11 +2025,7 @@ gotten: | |||
1943 | * thread doing COW. | 2025 | * thread doing COW. |
1944 | */ | 2026 | */ |
1945 | ptep_clear_flush_notify(vma, address, page_table); | 2027 | ptep_clear_flush_notify(vma, address, page_table); |
1946 | SetPageSwapBacked(new_page); | ||
1947 | lru_cache_add_active_or_unevictable(new_page, vma); | ||
1948 | page_add_new_anon_rmap(new_page, vma, address); | 2028 | page_add_new_anon_rmap(new_page, vma, address); |
1949 | |||
1950 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
1951 | set_pte_at(mm, address, page_table, entry); | 2029 | set_pte_at(mm, address, page_table, entry); |
1952 | update_mmu_cache(vma, address, entry); | 2030 | update_mmu_cache(vma, address, entry); |
1953 | if (old_page) { | 2031 | if (old_page) { |
@@ -1973,7 +2051,7 @@ gotten: | |||
1973 | * mapcount is visible. So transitively, TLBs to | 2051 | * mapcount is visible. So transitively, TLBs to |
1974 | * old page will be flushed before it can be reused. | 2052 | * old page will be flushed before it can be reused. |
1975 | */ | 2053 | */ |
1976 | page_remove_rmap(old_page, vma); | 2054 | page_remove_rmap(old_page); |
1977 | } | 2055 | } |
1978 | 2056 | ||
1979 | /* Free the old page.. */ | 2057 | /* Free the old page.. */ |
@@ -2314,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2314 | struct page *page; | 2392 | struct page *page; |
2315 | swp_entry_t entry; | 2393 | swp_entry_t entry; |
2316 | pte_t pte; | 2394 | pte_t pte; |
2395 | struct mem_cgroup *ptr = NULL; | ||
2317 | int ret = 0; | 2396 | int ret = 0; |
2318 | 2397 | ||
2319 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2398 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
@@ -2352,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2352 | lock_page(page); | 2431 | lock_page(page); |
2353 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2432 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2354 | 2433 | ||
2355 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2434 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2356 | ret = VM_FAULT_OOM; | 2435 | ret = VM_FAULT_OOM; |
2357 | unlock_page(page); | 2436 | unlock_page(page); |
2358 | goto out; | 2437 | goto out; |
@@ -2370,22 +2449,35 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2370 | goto out_nomap; | 2449 | goto out_nomap; |
2371 | } | 2450 | } |
2372 | 2451 | ||
2373 | /* The page isn't present yet, go ahead with the fault. */ | 2452 | /* |
2453 | * The page isn't present yet, go ahead with the fault. | ||
2454 | * | ||
2455 | * Be careful about the sequence of operations here. | ||
2456 | * To get its accounting right, reuse_swap_page() must be called | ||
2457 | * while the page is counted on swap but not yet in mapcount i.e. | ||
2458 | * before page_add_anon_rmap() and swap_free(); try_to_free_swap() | ||
2459 | * must be called after the swap_free(), or it will never succeed. | ||
2460 | * Because delete_from_swap_page() may be called by reuse_swap_page(), | ||
2461 | * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry | ||
2462 | * in page->private. In this case, a record in swap_cgroup is silently | ||
2463 | * discarded at swap_free(). | ||
2464 | */ | ||
2374 | 2465 | ||
2375 | inc_mm_counter(mm, anon_rss); | 2466 | inc_mm_counter(mm, anon_rss); |
2376 | pte = mk_pte(page, vma->vm_page_prot); | 2467 | pte = mk_pte(page, vma->vm_page_prot); |
2377 | if (write_access && can_share_swap_page(page)) { | 2468 | if (write_access && reuse_swap_page(page)) { |
2378 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); | 2469 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); |
2379 | write_access = 0; | 2470 | write_access = 0; |
2380 | } | 2471 | } |
2381 | |||
2382 | flush_icache_page(vma, page); | 2472 | flush_icache_page(vma, page); |
2383 | set_pte_at(mm, address, page_table, pte); | 2473 | set_pte_at(mm, address, page_table, pte); |
2384 | page_add_anon_rmap(page, vma, address); | 2474 | page_add_anon_rmap(page, vma, address); |
2475 | /* It's better to call commit-charge after rmap is established */ | ||
2476 | mem_cgroup_commit_charge_swapin(page, ptr); | ||
2385 | 2477 | ||
2386 | swap_free(entry); | 2478 | swap_free(entry); |
2387 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 2479 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
2388 | remove_exclusive_swap_page(page); | 2480 | try_to_free_swap(page); |
2389 | unlock_page(page); | 2481 | unlock_page(page); |
2390 | 2482 | ||
2391 | if (write_access) { | 2483 | if (write_access) { |
@@ -2402,7 +2494,7 @@ unlock: | |||
2402 | out: | 2494 | out: |
2403 | return ret; | 2495 | return ret; |
2404 | out_nomap: | 2496 | out_nomap: |
2405 | mem_cgroup_uncharge_page(page); | 2497 | mem_cgroup_cancel_charge_swapin(ptr); |
2406 | pte_unmap_unlock(page_table, ptl); | 2498 | pte_unmap_unlock(page_table, ptl); |
2407 | unlock_page(page); | 2499 | unlock_page(page); |
2408 | page_cache_release(page); | 2500 | page_cache_release(page); |
@@ -2432,7 +2524,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2432 | goto oom; | 2524 | goto oom; |
2433 | __SetPageUptodate(page); | 2525 | __SetPageUptodate(page); |
2434 | 2526 | ||
2435 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) | 2527 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) |
2436 | goto oom_free_page; | 2528 | goto oom_free_page; |
2437 | 2529 | ||
2438 | entry = mk_pte(page, vma->vm_page_prot); | 2530 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -2442,8 +2534,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2442 | if (!pte_none(*page_table)) | 2534 | if (!pte_none(*page_table)) |
2443 | goto release; | 2535 | goto release; |
2444 | inc_mm_counter(mm, anon_rss); | 2536 | inc_mm_counter(mm, anon_rss); |
2445 | SetPageSwapBacked(page); | ||
2446 | lru_cache_add_active_or_unevictable(page, vma); | ||
2447 | page_add_new_anon_rmap(page, vma, address); | 2537 | page_add_new_anon_rmap(page, vma, address); |
2448 | set_pte_at(mm, address, page_table, entry); | 2538 | set_pte_at(mm, address, page_table, entry); |
2449 | 2539 | ||
@@ -2525,7 +2615,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2525 | ret = VM_FAULT_OOM; | 2615 | ret = VM_FAULT_OOM; |
2526 | goto out; | 2616 | goto out; |
2527 | } | 2617 | } |
2528 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2618 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { |
2529 | ret = VM_FAULT_OOM; | 2619 | ret = VM_FAULT_OOM; |
2530 | page_cache_release(page); | 2620 | page_cache_release(page); |
2531 | goto out; | 2621 | goto out; |
@@ -2591,8 +2681,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2591 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2681 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2592 | if (anon) { | 2682 | if (anon) { |
2593 | inc_mm_counter(mm, anon_rss); | 2683 | inc_mm_counter(mm, anon_rss); |
2594 | SetPageSwapBacked(page); | ||
2595 | lru_cache_add_active_or_unevictable(page, vma); | ||
2596 | page_add_new_anon_rmap(page, vma, address); | 2684 | page_add_new_anon_rmap(page, vma, address); |
2597 | } else { | 2685 | } else { |
2598 | inc_mm_counter(mm, file_rss); | 2686 | inc_mm_counter(mm, file_rss); |
@@ -2602,7 +2690,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2602 | get_page(dirty_page); | 2690 | get_page(dirty_page); |
2603 | } | 2691 | } |
2604 | } | 2692 | } |
2605 | //TODO: is this safe? do_anonymous_page() does it this way. | ||
2606 | set_pte_at(mm, address, page_table, entry); | 2693 | set_pte_at(mm, address, page_table, entry); |
2607 | 2694 | ||
2608 | /* no need to invalidate: a not-present page won't be cached */ | 2695 | /* no need to invalidate: a not-present page won't be cached */ |
@@ -2666,12 +2753,11 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2666 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2753 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
2667 | return 0; | 2754 | return 0; |
2668 | 2755 | ||
2669 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || | 2756 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { |
2670 | !(vma->vm_flags & VM_CAN_NONLINEAR))) { | ||
2671 | /* | 2757 | /* |
2672 | * Page table corrupted: show pte and kill process. | 2758 | * Page table corrupted: show pte and kill process. |
2673 | */ | 2759 | */ |
2674 | print_bad_pte(vma, orig_pte, address); | 2760 | print_bad_pte(vma, address, orig_pte, NULL); |
2675 | return VM_FAULT_OOM; | 2761 | return VM_FAULT_OOM; |
2676 | } | 2762 | } |
2677 | 2763 | ||
@@ -2953,7 +3039,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | |||
2953 | { | 3039 | { |
2954 | resource_size_t phys_addr; | 3040 | resource_size_t phys_addr; |
2955 | unsigned long prot = 0; | 3041 | unsigned long prot = 0; |
2956 | void *maddr; | 3042 | void __iomem *maddr; |
2957 | int offset = addr & (PAGE_SIZE-1); | 3043 | int offset = addr & (PAGE_SIZE-1); |
2958 | 3044 | ||
2959 | if (follow_phys(vma, addr, write, &prot, &phys_addr)) | 3045 | if (follow_phys(vma, addr, write, &prot, &phys_addr)) |