summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c72
1 files changed, 48 insertions, 24 deletions
diff --git a/mm/memory.c b/mm/memory.c
index e11ca9dd823f..47fe250307c7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,6 +69,7 @@
69#include <linux/userfaultfd_k.h> 69#include <linux/userfaultfd_k.h>
70#include <linux/dax.h> 70#include <linux/dax.h>
71#include <linux/oom.h> 71#include <linux/oom.h>
72#include <linux/numa.h>
72 73
73#include <asm/io.h> 74#include <asm/io.h>
74#include <asm/mmu_context.h> 75#include <asm/mmu_context.h>
@@ -1451,7 +1452,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1451 spinlock_t *ptl; 1452 spinlock_t *ptl;
1452 1453
1453 retval = -EINVAL; 1454 retval = -EINVAL;
1454 if (PageAnon(page)) 1455 if (PageAnon(page) || PageSlab(page) || page_has_type(page))
1455 goto out; 1456 goto out;
1456 retval = -ENOMEM; 1457 retval = -ENOMEM;
1457 flush_dcache_page(page); 1458 flush_dcache_page(page);
@@ -1503,6 +1504,8 @@ out:
1503 * under mm->mmap_sem write-lock, so it can change vma->vm_flags. 1504 * under mm->mmap_sem write-lock, so it can change vma->vm_flags.
1504 * Caller must set VM_MIXEDMAP on vma if it wants to call this 1505 * Caller must set VM_MIXEDMAP on vma if it wants to call this
1505 * function from other places, for example from page-fault handler. 1506 * function from other places, for example from page-fault handler.
1507 *
1508 * Return: %0 on success, negative error code otherwise.
1506 */ 1509 */
1507int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 1510int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
1508 struct page *page) 1511 struct page *page)
@@ -1830,7 +1833,9 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
1830 * @size: size of map area 1833 * @size: size of map area
1831 * @prot: page protection flags for this mapping 1834 * @prot: page protection flags for this mapping
1832 * 1835 *
1833 * Note: this is only safe if the mm semaphore is held when called. 1836 * Note: this is only safe if the mm semaphore is held when called.
1837 *
1838 * Return: %0 on success, negative error code otherwise.
1834 */ 1839 */
1835int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1840int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1836 unsigned long pfn, unsigned long size, pgprot_t prot) 1841 unsigned long pfn, unsigned long size, pgprot_t prot)
@@ -1903,6 +1908,8 @@ EXPORT_SYMBOL(remap_pfn_range);
1903 * 1908 *
1904 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get 1909 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
1905 * whatever write-combining details or similar. 1910 * whatever write-combining details or similar.
1911 *
1912 * Return: %0 on success, negative error code otherwise.
1906 */ 1913 */
1907int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1914int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
1908{ 1915{
@@ -2381,12 +2388,13 @@ oom:
2381 * 2388 *
2382 * This function handles all that is needed to finish a write page fault in a 2389 * This function handles all that is needed to finish a write page fault in a
2383 * shared mapping due to PTE being read-only once the mapped page is prepared. 2390 * shared mapping due to PTE being read-only once the mapped page is prepared.
2384 * It handles locking of PTE and modifying it. The function returns 2391 * It handles locking of PTE and modifying it.
2385 * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE
2386 * lock.
2387 * 2392 *
2388 * The function expects the page to be locked or other protection against 2393 * The function expects the page to be locked or other protection against
2389 * concurrent faults / writeback (such as DAX radix tree locks). 2394 * concurrent faults / writeback (such as DAX radix tree locks).
2395 *
2396 * Return: %VM_FAULT_WRITE on success, %0 when PTE got changed before
2397 * we acquired PTE lock.
2390 */ 2398 */
2391vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) 2399vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
2392{ 2400{
@@ -2504,8 +2512,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
2504 * Take out anonymous pages first, anonymous shared vmas are 2512 * Take out anonymous pages first, anonymous shared vmas are
2505 * not dirty accountable. 2513 * not dirty accountable.
2506 */ 2514 */
2507 if (PageAnon(vmf->page) && !PageKsm(vmf->page)) { 2515 if (PageAnon(vmf->page)) {
2508 int total_map_swapcount; 2516 int total_map_swapcount;
2517 if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) ||
2518 page_count(vmf->page) != 1))
2519 goto copy;
2509 if (!trylock_page(vmf->page)) { 2520 if (!trylock_page(vmf->page)) {
2510 get_page(vmf->page); 2521 get_page(vmf->page);
2511 pte_unmap_unlock(vmf->pte, vmf->ptl); 2522 pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2520,6 +2531,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
2520 } 2531 }
2521 put_page(vmf->page); 2532 put_page(vmf->page);
2522 } 2533 }
2534 if (PageKsm(vmf->page)) {
2535 bool reused = reuse_ksm_page(vmf->page, vmf->vma,
2536 vmf->address);
2537 unlock_page(vmf->page);
2538 if (!reused)
2539 goto copy;
2540 wp_page_reuse(vmf);
2541 return VM_FAULT_WRITE;
2542 }
2523 if (reuse_swap_page(vmf->page, &total_map_swapcount)) { 2543 if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
2524 if (total_map_swapcount == 1) { 2544 if (total_map_swapcount == 1) {
2525 /* 2545 /*
@@ -2540,7 +2560,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
2540 (VM_WRITE|VM_SHARED))) { 2560 (VM_WRITE|VM_SHARED))) {
2541 return wp_page_shared(vmf); 2561 return wp_page_shared(vmf);
2542 } 2562 }
2543 2563copy:
2544 /* 2564 /*
2545 * Ok, we need to copy. Oh, well.. 2565 * Ok, we need to copy. Oh, well..
2546 */ 2566 */
@@ -3201,6 +3221,8 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
3201 * 3221 *
3202 * Target users are page handler itself and implementations of 3222 * Target users are page handler itself and implementations of
3203 * vm_ops->map_pages. 3223 * vm_ops->map_pages.
3224 *
3225 * Return: %0 on success, %VM_FAULT_ code in case of error.
3204 */ 3226 */
3205vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, 3227vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
3206 struct page *page) 3228 struct page *page)
@@ -3261,11 +3283,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
3261 * This function handles all that is needed to finish a page fault once the 3283 * This function handles all that is needed to finish a page fault once the
3262 * page to fault in is prepared. It handles locking of PTEs, inserts PTE for 3284 * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
3263 * given page, adds reverse page mapping, handles memcg charges and LRU 3285 * given page, adds reverse page mapping, handles memcg charges and LRU
3264 * addition. The function returns 0 on success, VM_FAULT_ code in case of 3286 * addition.
3265 * error.
3266 * 3287 *
3267 * The function expects the page to be locked and on success it consumes a 3288 * The function expects the page to be locked and on success it consumes a
3268 * reference of a page being mapped (for the PTE which maps it). 3289 * reference of a page being mapped (for the PTE which maps it).
3290 *
3291 * Return: %0 on success, %VM_FAULT_ code in case of error.
3269 */ 3292 */
3270vm_fault_t finish_fault(struct vm_fault *vmf) 3293vm_fault_t finish_fault(struct vm_fault *vmf)
3271{ 3294{
@@ -3321,12 +3344,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops,
3321 3344
3322static int __init fault_around_debugfs(void) 3345static int __init fault_around_debugfs(void)
3323{ 3346{
3324 void *ret; 3347 debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
3325 3348 &fault_around_bytes_fops);
3326 ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
3327 &fault_around_bytes_fops);
3328 if (!ret)
3329 pr_warn("Failed to create fault_around_bytes in debugfs");
3330 return 0; 3349 return 0;
3331} 3350}
3332late_initcall(fault_around_debugfs); 3351late_initcall(fault_around_debugfs);
@@ -3517,10 +3536,13 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
3517 * but allow concurrent faults). 3536 * but allow concurrent faults).
3518 * The mmap_sem may have been released depending on flags and our 3537 * The mmap_sem may have been released depending on flags and our
3519 * return value. See filemap_fault() and __lock_page_or_retry(). 3538 * return value. See filemap_fault() and __lock_page_or_retry().
3539 * If mmap_sem is released, vma may become invalid (for example
3540 * by other thread calling munmap()).
3520 */ 3541 */
3521static vm_fault_t do_fault(struct vm_fault *vmf) 3542static vm_fault_t do_fault(struct vm_fault *vmf)
3522{ 3543{
3523 struct vm_area_struct *vma = vmf->vma; 3544 struct vm_area_struct *vma = vmf->vma;
3545 struct mm_struct *vm_mm = vma->vm_mm;
3524 vm_fault_t ret; 3546 vm_fault_t ret;
3525 3547
3526 /* 3548 /*
@@ -3561,7 +3583,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
3561 3583
3562 /* preallocated pagetable is unused: free it */ 3584 /* preallocated pagetable is unused: free it */
3563 if (vmf->prealloc_pte) { 3585 if (vmf->prealloc_pte) {
3564 pte_free(vma->vm_mm, vmf->prealloc_pte); 3586 pte_free(vm_mm, vmf->prealloc_pte);
3565 vmf->prealloc_pte = NULL; 3587 vmf->prealloc_pte = NULL;
3566 } 3588 }
3567 return ret; 3589 return ret;
@@ -3586,11 +3608,11 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
3586{ 3608{
3587 struct vm_area_struct *vma = vmf->vma; 3609 struct vm_area_struct *vma = vmf->vma;
3588 struct page *page = NULL; 3610 struct page *page = NULL;
3589 int page_nid = -1; 3611 int page_nid = NUMA_NO_NODE;
3590 int last_cpupid; 3612 int last_cpupid;
3591 int target_nid; 3613 int target_nid;
3592 bool migrated = false; 3614 bool migrated = false;
3593 pte_t pte; 3615 pte_t pte, old_pte;
3594 bool was_writable = pte_savedwrite(vmf->orig_pte); 3616 bool was_writable = pte_savedwrite(vmf->orig_pte);
3595 int flags = 0; 3617 int flags = 0;
3596 3618
@@ -3610,12 +3632,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
3610 * Make it present again, Depending on how arch implementes non 3632 * Make it present again, Depending on how arch implementes non
3611 * accessible ptes, some can allow access by kernel mode. 3633 * accessible ptes, some can allow access by kernel mode.
3612 */ 3634 */
3613 pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte); 3635 old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
3614 pte = pte_modify(pte, vma->vm_page_prot); 3636 pte = pte_modify(old_pte, vma->vm_page_prot);
3615 pte = pte_mkyoung(pte); 3637 pte = pte_mkyoung(pte);
3616 if (was_writable) 3638 if (was_writable)
3617 pte = pte_mkwrite(pte); 3639 pte = pte_mkwrite(pte);
3618 ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte); 3640 ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
3619 update_mmu_cache(vma, vmf->address, vmf->pte); 3641 update_mmu_cache(vma, vmf->address, vmf->pte);
3620 3642
3621 page = vm_normal_page(vma, vmf->address, pte); 3643 page = vm_normal_page(vma, vmf->address, pte);
@@ -3653,7 +3675,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
3653 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, 3675 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
3654 &flags); 3676 &flags);
3655 pte_unmap_unlock(vmf->pte, vmf->ptl); 3677 pte_unmap_unlock(vmf->pte, vmf->ptl);
3656 if (target_nid == -1) { 3678 if (target_nid == NUMA_NO_NODE) {
3657 put_page(page); 3679 put_page(page);
3658 goto out; 3680 goto out;
3659 } 3681 }
@@ -3667,7 +3689,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
3667 flags |= TNF_MIGRATE_FAIL; 3689 flags |= TNF_MIGRATE_FAIL;
3668 3690
3669out: 3691out:
3670 if (page_nid != -1) 3692 if (page_nid != NUMA_NO_NODE)
3671 task_numa_fault(last_cpupid, page_nid, 1, flags); 3693 task_numa_fault(last_cpupid, page_nid, 1, flags);
3672 return 0; 3694 return 0;
3673} 3695}
@@ -4150,7 +4172,7 @@ EXPORT_SYMBOL(follow_pte_pmd);
4150 * 4172 *
4151 * Only IO mappings and raw PFN mappings are allowed. 4173 * Only IO mappings and raw PFN mappings are allowed.
4152 * 4174 *
4153 * Returns zero and the pfn at @pfn on success, -ve otherwise. 4175 * Return: zero and the pfn at @pfn on success, -ve otherwise.
4154 */ 4176 */
4155int follow_pfn(struct vm_area_struct *vma, unsigned long address, 4177int follow_pfn(struct vm_area_struct *vma, unsigned long address,
4156 unsigned long *pfn) 4178 unsigned long *pfn)
@@ -4300,6 +4322,8 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
4300 * @gup_flags: flags modifying lookup behaviour 4322 * @gup_flags: flags modifying lookup behaviour
4301 * 4323 *
4302 * The caller must hold a reference on @mm. 4324 * The caller must hold a reference on @mm.
4325 *
4326 * Return: number of bytes copied from source to destination.
4303 */ 4327 */
4304int access_remote_vm(struct mm_struct *mm, unsigned long addr, 4328int access_remote_vm(struct mm_struct *mm, unsigned long addr,
4305 void *buf, int len, unsigned int gup_flags) 4329 void *buf, int len, unsigned int gup_flags)