diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 72 |
1 files changed, 48 insertions, 24 deletions
diff --git a/mm/memory.c b/mm/memory.c index e11ca9dd823f..47fe250307c7 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include <linux/userfaultfd_k.h> | 69 | #include <linux/userfaultfd_k.h> |
70 | #include <linux/dax.h> | 70 | #include <linux/dax.h> |
71 | #include <linux/oom.h> | 71 | #include <linux/oom.h> |
72 | #include <linux/numa.h> | ||
72 | 73 | ||
73 | #include <asm/io.h> | 74 | #include <asm/io.h> |
74 | #include <asm/mmu_context.h> | 75 | #include <asm/mmu_context.h> |
@@ -1451,7 +1452,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
1451 | spinlock_t *ptl; | 1452 | spinlock_t *ptl; |
1452 | 1453 | ||
1453 | retval = -EINVAL; | 1454 | retval = -EINVAL; |
1454 | if (PageAnon(page)) | 1455 | if (PageAnon(page) || PageSlab(page) || page_has_type(page)) |
1455 | goto out; | 1456 | goto out; |
1456 | retval = -ENOMEM; | 1457 | retval = -ENOMEM; |
1457 | flush_dcache_page(page); | 1458 | flush_dcache_page(page); |
@@ -1503,6 +1504,8 @@ out: | |||
1503 | * under mm->mmap_sem write-lock, so it can change vma->vm_flags. | 1504 | * under mm->mmap_sem write-lock, so it can change vma->vm_flags. |
1504 | * Caller must set VM_MIXEDMAP on vma if it wants to call this | 1505 | * Caller must set VM_MIXEDMAP on vma if it wants to call this |
1505 | * function from other places, for example from page-fault handler. | 1506 | * function from other places, for example from page-fault handler. |
1507 | * | ||
1508 | * Return: %0 on success, negative error code otherwise. | ||
1506 | */ | 1509 | */ |
1507 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, | 1510 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, |
1508 | struct page *page) | 1511 | struct page *page) |
@@ -1830,7 +1833,9 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, | |||
1830 | * @size: size of map area | 1833 | * @size: size of map area |
1831 | * @prot: page protection flags for this mapping | 1834 | * @prot: page protection flags for this mapping |
1832 | * | 1835 | * |
1833 | * Note: this is only safe if the mm semaphore is held when called. | 1836 | * Note: this is only safe if the mm semaphore is held when called. |
1837 | * | ||
1838 | * Return: %0 on success, negative error code otherwise. | ||
1834 | */ | 1839 | */ |
1835 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | 1840 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, |
1836 | unsigned long pfn, unsigned long size, pgprot_t prot) | 1841 | unsigned long pfn, unsigned long size, pgprot_t prot) |
@@ -1903,6 +1908,8 @@ EXPORT_SYMBOL(remap_pfn_range); | |||
1903 | * | 1908 | * |
1904 | * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get | 1909 | * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get |
1905 | * whatever write-combining details or similar. | 1910 | * whatever write-combining details or similar. |
1911 | * | ||
1912 | * Return: %0 on success, negative error code otherwise. | ||
1906 | */ | 1913 | */ |
1907 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) | 1914 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) |
1908 | { | 1915 | { |
@@ -2381,12 +2388,13 @@ oom: | |||
2381 | * | 2388 | * |
2382 | * This function handles all that is needed to finish a write page fault in a | 2389 | * This function handles all that is needed to finish a write page fault in a |
2383 | * shared mapping due to PTE being read-only once the mapped page is prepared. | 2390 | * shared mapping due to PTE being read-only once the mapped page is prepared. |
2384 | * It handles locking of PTE and modifying it. The function returns | 2391 | * It handles locking of PTE and modifying it. |
2385 | * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE | ||
2386 | * lock. | ||
2387 | * | 2392 | * |
2388 | * The function expects the page to be locked or other protection against | 2393 | * The function expects the page to be locked or other protection against |
2389 | * concurrent faults / writeback (such as DAX radix tree locks). | 2394 | * concurrent faults / writeback (such as DAX radix tree locks). |
2395 | * | ||
2396 | * Return: %VM_FAULT_WRITE on success, %0 when PTE got changed before | ||
2397 | * we acquired PTE lock. | ||
2390 | */ | 2398 | */ |
2391 | vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) | 2399 | vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) |
2392 | { | 2400 | { |
@@ -2504,8 +2512,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) | |||
2504 | * Take out anonymous pages first, anonymous shared vmas are | 2512 | * Take out anonymous pages first, anonymous shared vmas are |
2505 | * not dirty accountable. | 2513 | * not dirty accountable. |
2506 | */ | 2514 | */ |
2507 | if (PageAnon(vmf->page) && !PageKsm(vmf->page)) { | 2515 | if (PageAnon(vmf->page)) { |
2508 | int total_map_swapcount; | 2516 | int total_map_swapcount; |
2517 | if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) || | ||
2518 | page_count(vmf->page) != 1)) | ||
2519 | goto copy; | ||
2509 | if (!trylock_page(vmf->page)) { | 2520 | if (!trylock_page(vmf->page)) { |
2510 | get_page(vmf->page); | 2521 | get_page(vmf->page); |
2511 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 2522 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
@@ -2520,6 +2531,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) | |||
2520 | } | 2531 | } |
2521 | put_page(vmf->page); | 2532 | put_page(vmf->page); |
2522 | } | 2533 | } |
2534 | if (PageKsm(vmf->page)) { | ||
2535 | bool reused = reuse_ksm_page(vmf->page, vmf->vma, | ||
2536 | vmf->address); | ||
2537 | unlock_page(vmf->page); | ||
2538 | if (!reused) | ||
2539 | goto copy; | ||
2540 | wp_page_reuse(vmf); | ||
2541 | return VM_FAULT_WRITE; | ||
2542 | } | ||
2523 | if (reuse_swap_page(vmf->page, &total_map_swapcount)) { | 2543 | if (reuse_swap_page(vmf->page, &total_map_swapcount)) { |
2524 | if (total_map_swapcount == 1) { | 2544 | if (total_map_swapcount == 1) { |
2525 | /* | 2545 | /* |
@@ -2540,7 +2560,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) | |||
2540 | (VM_WRITE|VM_SHARED))) { | 2560 | (VM_WRITE|VM_SHARED))) { |
2541 | return wp_page_shared(vmf); | 2561 | return wp_page_shared(vmf); |
2542 | } | 2562 | } |
2543 | 2563 | copy: | |
2544 | /* | 2564 | /* |
2545 | * Ok, we need to copy. Oh, well.. | 2565 | * Ok, we need to copy. Oh, well.. |
2546 | */ | 2566 | */ |
@@ -3201,6 +3221,8 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) | |||
3201 | * | 3221 | * |
3202 | * Target users are page handler itself and implementations of | 3222 | * Target users are page handler itself and implementations of |
3203 | * vm_ops->map_pages. | 3223 | * vm_ops->map_pages. |
3224 | * | ||
3225 | * Return: %0 on success, %VM_FAULT_ code in case of error. | ||
3204 | */ | 3226 | */ |
3205 | vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, | 3227 | vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, |
3206 | struct page *page) | 3228 | struct page *page) |
@@ -3261,11 +3283,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, | |||
3261 | * This function handles all that is needed to finish a page fault once the | 3283 | * This function handles all that is needed to finish a page fault once the |
3262 | * page to fault in is prepared. It handles locking of PTEs, inserts PTE for | 3284 | * page to fault in is prepared. It handles locking of PTEs, inserts PTE for |
3263 | * given page, adds reverse page mapping, handles memcg charges and LRU | 3285 | * given page, adds reverse page mapping, handles memcg charges and LRU |
3264 | * addition. The function returns 0 on success, VM_FAULT_ code in case of | 3286 | * addition. |
3265 | * error. | ||
3266 | * | 3287 | * |
3267 | * The function expects the page to be locked and on success it consumes a | 3288 | * The function expects the page to be locked and on success it consumes a |
3268 | * reference of a page being mapped (for the PTE which maps it). | 3289 | * reference of a page being mapped (for the PTE which maps it). |
3290 | * | ||
3291 | * Return: %0 on success, %VM_FAULT_ code in case of error. | ||
3269 | */ | 3292 | */ |
3270 | vm_fault_t finish_fault(struct vm_fault *vmf) | 3293 | vm_fault_t finish_fault(struct vm_fault *vmf) |
3271 | { | 3294 | { |
@@ -3321,12 +3344,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops, | |||
3321 | 3344 | ||
3322 | static int __init fault_around_debugfs(void) | 3345 | static int __init fault_around_debugfs(void) |
3323 | { | 3346 | { |
3324 | void *ret; | 3347 | debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL, |
3325 | 3348 | &fault_around_bytes_fops); | |
3326 | ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL, | ||
3327 | &fault_around_bytes_fops); | ||
3328 | if (!ret) | ||
3329 | pr_warn("Failed to create fault_around_bytes in debugfs"); | ||
3330 | return 0; | 3349 | return 0; |
3331 | } | 3350 | } |
3332 | late_initcall(fault_around_debugfs); | 3351 | late_initcall(fault_around_debugfs); |
@@ -3517,10 +3536,13 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) | |||
3517 | * but allow concurrent faults). | 3536 | * but allow concurrent faults). |
3518 | * The mmap_sem may have been released depending on flags and our | 3537 | * The mmap_sem may have been released depending on flags and our |
3519 | * return value. See filemap_fault() and __lock_page_or_retry(). | 3538 | * return value. See filemap_fault() and __lock_page_or_retry(). |
3539 | * If mmap_sem is released, vma may become invalid (for example | ||
3540 | * by other thread calling munmap()). | ||
3520 | */ | 3541 | */ |
3521 | static vm_fault_t do_fault(struct vm_fault *vmf) | 3542 | static vm_fault_t do_fault(struct vm_fault *vmf) |
3522 | { | 3543 | { |
3523 | struct vm_area_struct *vma = vmf->vma; | 3544 | struct vm_area_struct *vma = vmf->vma; |
3545 | struct mm_struct *vm_mm = vma->vm_mm; | ||
3524 | vm_fault_t ret; | 3546 | vm_fault_t ret; |
3525 | 3547 | ||
3526 | /* | 3548 | /* |
@@ -3561,7 +3583,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf) | |||
3561 | 3583 | ||
3562 | /* preallocated pagetable is unused: free it */ | 3584 | /* preallocated pagetable is unused: free it */ |
3563 | if (vmf->prealloc_pte) { | 3585 | if (vmf->prealloc_pte) { |
3564 | pte_free(vma->vm_mm, vmf->prealloc_pte); | 3586 | pte_free(vm_mm, vmf->prealloc_pte); |
3565 | vmf->prealloc_pte = NULL; | 3587 | vmf->prealloc_pte = NULL; |
3566 | } | 3588 | } |
3567 | return ret; | 3589 | return ret; |
@@ -3586,11 +3608,11 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) | |||
3586 | { | 3608 | { |
3587 | struct vm_area_struct *vma = vmf->vma; | 3609 | struct vm_area_struct *vma = vmf->vma; |
3588 | struct page *page = NULL; | 3610 | struct page *page = NULL; |
3589 | int page_nid = -1; | 3611 | int page_nid = NUMA_NO_NODE; |
3590 | int last_cpupid; | 3612 | int last_cpupid; |
3591 | int target_nid; | 3613 | int target_nid; |
3592 | bool migrated = false; | 3614 | bool migrated = false; |
3593 | pte_t pte; | 3615 | pte_t pte, old_pte; |
3594 | bool was_writable = pte_savedwrite(vmf->orig_pte); | 3616 | bool was_writable = pte_savedwrite(vmf->orig_pte); |
3595 | int flags = 0; | 3617 | int flags = 0; |
3596 | 3618 | ||
@@ -3610,12 +3632,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) | |||
3610 | * Make it present again, Depending on how arch implementes non | 3632 | * Make it present again, Depending on how arch implementes non |
3611 | * accessible ptes, some can allow access by kernel mode. | 3633 | * accessible ptes, some can allow access by kernel mode. |
3612 | */ | 3634 | */ |
3613 | pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte); | 3635 | old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); |
3614 | pte = pte_modify(pte, vma->vm_page_prot); | 3636 | pte = pte_modify(old_pte, vma->vm_page_prot); |
3615 | pte = pte_mkyoung(pte); | 3637 | pte = pte_mkyoung(pte); |
3616 | if (was_writable) | 3638 | if (was_writable) |
3617 | pte = pte_mkwrite(pte); | 3639 | pte = pte_mkwrite(pte); |
3618 | ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte); | 3640 | ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); |
3619 | update_mmu_cache(vma, vmf->address, vmf->pte); | 3641 | update_mmu_cache(vma, vmf->address, vmf->pte); |
3620 | 3642 | ||
3621 | page = vm_normal_page(vma, vmf->address, pte); | 3643 | page = vm_normal_page(vma, vmf->address, pte); |
@@ -3653,7 +3675,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) | |||
3653 | target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, | 3675 | target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, |
3654 | &flags); | 3676 | &flags); |
3655 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 3677 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
3656 | if (target_nid == -1) { | 3678 | if (target_nid == NUMA_NO_NODE) { |
3657 | put_page(page); | 3679 | put_page(page); |
3658 | goto out; | 3680 | goto out; |
3659 | } | 3681 | } |
@@ -3667,7 +3689,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) | |||
3667 | flags |= TNF_MIGRATE_FAIL; | 3689 | flags |= TNF_MIGRATE_FAIL; |
3668 | 3690 | ||
3669 | out: | 3691 | out: |
3670 | if (page_nid != -1) | 3692 | if (page_nid != NUMA_NO_NODE) |
3671 | task_numa_fault(last_cpupid, page_nid, 1, flags); | 3693 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
3672 | return 0; | 3694 | return 0; |
3673 | } | 3695 | } |
@@ -4150,7 +4172,7 @@ EXPORT_SYMBOL(follow_pte_pmd); | |||
4150 | * | 4172 | * |
4151 | * Only IO mappings and raw PFN mappings are allowed. | 4173 | * Only IO mappings and raw PFN mappings are allowed. |
4152 | * | 4174 | * |
4153 | * Returns zero and the pfn at @pfn on success, -ve otherwise. | 4175 | * Return: zero and the pfn at @pfn on success, -ve otherwise. |
4154 | */ | 4176 | */ |
4155 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, | 4177 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
4156 | unsigned long *pfn) | 4178 | unsigned long *pfn) |
@@ -4300,6 +4322,8 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, | |||
4300 | * @gup_flags: flags modifying lookup behaviour | 4322 | * @gup_flags: flags modifying lookup behaviour |
4301 | * | 4323 | * |
4302 | * The caller must hold a reference on @mm. | 4324 | * The caller must hold a reference on @mm. |
4325 | * | ||
4326 | * Return: number of bytes copied from source to destination. | ||
4303 | */ | 4327 | */ |
4304 | int access_remote_vm(struct mm_struct *mm, unsigned long addr, | 4328 | int access_remote_vm(struct mm_struct *mm, unsigned long addr, |
4305 | void *buf, int len, unsigned int gup_flags) | 4329 | void *buf, int len, unsigned int gup_flags) |