diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 5 | ||||
-rw-r--r-- | mm/cma.c | 15 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 22 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 64 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/util.c | 75 | ||||
-rw-r--r-- | mm/vmscan.c | 44 | ||||
-rw-r--r-- | mm/z3fold.c | 29 |
12 files changed, 206 insertions, 65 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 495d7368ced8..56cec636a1fc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -649,8 +649,7 @@ config IDLE_PAGE_TRACKING | |||
649 | See Documentation/admin-guide/mm/idle_page_tracking.rst for | 649 | See Documentation/admin-guide/mm/idle_page_tracking.rst for |
650 | more details. | 650 | more details. |
651 | 651 | ||
652 | # arch_add_memory() comprehends device memory | 652 | config ARCH_HAS_PTE_DEVMAP |
653 | config ARCH_HAS_ZONE_DEVICE | ||
654 | bool | 653 | bool |
655 | 654 | ||
656 | config ZONE_DEVICE | 655 | config ZONE_DEVICE |
@@ -658,7 +657,7 @@ config ZONE_DEVICE | |||
658 | depends on MEMORY_HOTPLUG | 657 | depends on MEMORY_HOTPLUG |
659 | depends on MEMORY_HOTREMOVE | 658 | depends on MEMORY_HOTREMOVE |
660 | depends on SPARSEMEM_VMEMMAP | 659 | depends on SPARSEMEM_VMEMMAP |
661 | depends on ARCH_HAS_ZONE_DEVICE | 660 | depends on ARCH_HAS_PTE_DEVMAP |
662 | select XARRAY_MULTI | 661 | select XARRAY_MULTI |
663 | 662 | ||
664 | help | 663 | help |
@@ -278,6 +278,12 @@ int __init cma_declare_contiguous(phys_addr_t base, | |||
278 | */ | 278 | */ |
279 | alignment = max(alignment, (phys_addr_t)PAGE_SIZE << | 279 | alignment = max(alignment, (phys_addr_t)PAGE_SIZE << |
280 | max_t(unsigned long, MAX_ORDER - 1, pageblock_order)); | 280 | max_t(unsigned long, MAX_ORDER - 1, pageblock_order)); |
281 | if (fixed && base & (alignment - 1)) { | ||
282 | ret = -EINVAL; | ||
283 | pr_err("Region at %pa must be aligned to %pa bytes\n", | ||
284 | &base, &alignment); | ||
285 | goto err; | ||
286 | } | ||
281 | base = ALIGN(base, alignment); | 287 | base = ALIGN(base, alignment); |
282 | size = ALIGN(size, alignment); | 288 | size = ALIGN(size, alignment); |
283 | limit &= ~(alignment - 1); | 289 | limit &= ~(alignment - 1); |
@@ -308,6 +314,13 @@ int __init cma_declare_contiguous(phys_addr_t base, | |||
308 | if (limit == 0 || limit > memblock_end) | 314 | if (limit == 0 || limit > memblock_end) |
309 | limit = memblock_end; | 315 | limit = memblock_end; |
310 | 316 | ||
317 | if (base + size > limit) { | ||
318 | ret = -EINVAL; | ||
319 | pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", | ||
320 | &size, &base, &limit); | ||
321 | goto err; | ||
322 | } | ||
323 | |||
311 | /* Reserve memory */ | 324 | /* Reserve memory */ |
312 | if (fixed) { | 325 | if (fixed) { |
313 | if (memblock_is_region_reserved(base, size) || | 326 | if (memblock_is_region_reserved(base, size) || |
@@ -494,7 +507,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, | |||
494 | * @pages: Allocated pages. | 507 | * @pages: Allocated pages. |
495 | * @count: Number of allocated pages. | 508 | * @count: Number of allocated pages. |
496 | * | 509 | * |
497 | * This function releases memory allocated by alloc_cma(). | 510 | * This function releases memory allocated by cma_alloc(). |
498 | * It returns false when provided pages do not belong to contiguous area and | 511 | * It returns false when provided pages do not belong to contiguous area and |
499 | * true otherwise. | 512 | * true otherwise. |
500 | */ | 513 | */ |
@@ -1895,7 +1895,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, | |||
1895 | } | 1895 | } |
1896 | #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ | 1896 | #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ |
1897 | 1897 | ||
1898 | #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | 1898 | #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
1899 | static int __gup_device_huge(unsigned long pfn, unsigned long addr, | 1899 | static int __gup_device_huge(unsigned long pfn, unsigned long addr, |
1900 | unsigned long end, struct page **pages, int *nr) | 1900 | unsigned long end, struct page **pages, int *nr) |
1901 | { | 1901 | { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 249671873aa9..cdbb7a84cb6e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -695,12 +695,15 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) | |||
695 | if (mem_cgroup_disabled()) | 695 | if (mem_cgroup_disabled()) |
696 | return; | 696 | return; |
697 | 697 | ||
698 | __this_cpu_add(memcg->vmstats_local->stat[idx], val); | ||
699 | |||
700 | x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); | 698 | x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); |
701 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | 699 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { |
702 | struct mem_cgroup *mi; | 700 | struct mem_cgroup *mi; |
703 | 701 | ||
702 | /* | ||
703 | * Batch local counters to keep them in sync with | ||
704 | * the hierarchical ones. | ||
705 | */ | ||
706 | __this_cpu_add(memcg->vmstats_local->stat[idx], x); | ||
704 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) | 707 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) |
705 | atomic_long_add(x, &mi->vmstats[idx]); | 708 | atomic_long_add(x, &mi->vmstats[idx]); |
706 | x = 0; | 709 | x = 0; |
@@ -749,13 +752,15 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, | |||
749 | /* Update memcg */ | 752 | /* Update memcg */ |
750 | __mod_memcg_state(memcg, idx, val); | 753 | __mod_memcg_state(memcg, idx, val); |
751 | 754 | ||
752 | /* Update lruvec */ | ||
753 | __this_cpu_add(pn->lruvec_stat_local->count[idx], val); | ||
754 | |||
755 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); | 755 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); |
756 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | 756 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { |
757 | struct mem_cgroup_per_node *pi; | 757 | struct mem_cgroup_per_node *pi; |
758 | 758 | ||
759 | /* | ||
760 | * Batch local counters to keep them in sync with | ||
761 | * the hierarchical ones. | ||
762 | */ | ||
763 | __this_cpu_add(pn->lruvec_stat_local->count[idx], x); | ||
759 | for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) | 764 | for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) |
760 | atomic_long_add(x, &pi->lruvec_stat[idx]); | 765 | atomic_long_add(x, &pi->lruvec_stat[idx]); |
761 | x = 0; | 766 | x = 0; |
@@ -777,12 +782,15 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, | |||
777 | if (mem_cgroup_disabled()) | 782 | if (mem_cgroup_disabled()) |
778 | return; | 783 | return; |
779 | 784 | ||
780 | __this_cpu_add(memcg->vmstats_local->events[idx], count); | ||
781 | |||
782 | x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); | 785 | x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); |
783 | if (unlikely(x > MEMCG_CHARGE_BATCH)) { | 786 | if (unlikely(x > MEMCG_CHARGE_BATCH)) { |
784 | struct mem_cgroup *mi; | 787 | struct mem_cgroup *mi; |
785 | 788 | ||
789 | /* | ||
790 | * Batch local counters to keep them in sync with | ||
791 | * the hierarchical ones. | ||
792 | */ | ||
793 | __this_cpu_add(memcg->vmstats_local->events[idx], x); | ||
786 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) | 794 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) |
787 | atomic_long_add(x, &mi->vmevents[idx]); | 795 | atomic_long_add(x, &mi->vmevents[idx]); |
788 | x = 0; | 796 | x = 0; |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6166ba5a15f3..4ebe696138e8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1734,9 +1734,10 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) | |||
1734 | endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1; | 1734 | endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1; |
1735 | pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n", | 1735 | pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n", |
1736 | &beginpa, &endpa); | 1736 | &beginpa, &endpa); |
1737 | } | ||
1738 | 1737 | ||
1739 | return ret; | 1738 | return -EBUSY; |
1739 | } | ||
1740 | return 0; | ||
1740 | } | 1741 | } |
1741 | 1742 | ||
1742 | static int check_cpu_on_node(pg_data_t *pgdat) | 1743 | static int check_cpu_on_node(pg_data_t *pgdat) |
@@ -1819,19 +1820,9 @@ static void __release_memory_resource(resource_size_t start, | |||
1819 | } | 1820 | } |
1820 | } | 1821 | } |
1821 | 1822 | ||
1822 | /** | 1823 | static int __ref try_remove_memory(int nid, u64 start, u64 size) |
1823 | * remove_memory | ||
1824 | * @nid: the node ID | ||
1825 | * @start: physical address of the region to remove | ||
1826 | * @size: size of the region to remove | ||
1827 | * | ||
1828 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1829 | * and online/offline operations before this call, as required by | ||
1830 | * try_offline_node(). | ||
1831 | */ | ||
1832 | void __ref __remove_memory(int nid, u64 start, u64 size) | ||
1833 | { | 1824 | { |
1834 | int ret; | 1825 | int rc = 0; |
1835 | 1826 | ||
1836 | BUG_ON(check_hotplug_memory_range(start, size)); | 1827 | BUG_ON(check_hotplug_memory_range(start, size)); |
1837 | 1828 | ||
@@ -1839,13 +1830,13 @@ void __ref __remove_memory(int nid, u64 start, u64 size) | |||
1839 | 1830 | ||
1840 | /* | 1831 | /* |
1841 | * All memory blocks must be offlined before removing memory. Check | 1832 | * All memory blocks must be offlined before removing memory. Check |
1842 | * whether all memory blocks in question are offline and trigger a BUG() | 1833 | * whether all memory blocks in question are offline and return error |
1843 | * if this is not the case. | 1834 | * if this is not the case. |
1844 | */ | 1835 | */ |
1845 | ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, | 1836 | rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, |
1846 | check_memblock_offlined_cb); | 1837 | check_memblock_offlined_cb); |
1847 | if (ret) | 1838 | if (rc) |
1848 | BUG(); | 1839 | goto done; |
1849 | 1840 | ||
1850 | /* remove memmap entry */ | 1841 | /* remove memmap entry */ |
1851 | firmware_map_remove(start, start + size, "System RAM"); | 1842 | firmware_map_remove(start, start + size, "System RAM"); |
@@ -1857,14 +1848,45 @@ void __ref __remove_memory(int nid, u64 start, u64 size) | |||
1857 | 1848 | ||
1858 | try_offline_node(nid); | 1849 | try_offline_node(nid); |
1859 | 1850 | ||
1851 | done: | ||
1860 | mem_hotplug_done(); | 1852 | mem_hotplug_done(); |
1853 | return rc; | ||
1861 | } | 1854 | } |
1862 | 1855 | ||
1863 | void remove_memory(int nid, u64 start, u64 size) | 1856 | /** |
1857 | * remove_memory | ||
1858 | * @nid: the node ID | ||
1859 | * @start: physical address of the region to remove | ||
1860 | * @size: size of the region to remove | ||
1861 | * | ||
1862 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1863 | * and online/offline operations before this call, as required by | ||
1864 | * try_offline_node(). | ||
1865 | */ | ||
1866 | void __remove_memory(int nid, u64 start, u64 size) | ||
1867 | { | ||
1868 | |||
1869 | /* | ||
1870 | * trigger BUG() is some memory is not offlined prior to calling this | ||
1871 | * function | ||
1872 | */ | ||
1873 | if (try_remove_memory(nid, start, size)) | ||
1874 | BUG(); | ||
1875 | } | ||
1876 | |||
1877 | /* | ||
1878 | * Remove memory if every memory block is offline, otherwise return -EBUSY is | ||
1879 | * some memory is not offline | ||
1880 | */ | ||
1881 | int remove_memory(int nid, u64 start, u64 size) | ||
1864 | { | 1882 | { |
1883 | int rc; | ||
1884 | |||
1865 | lock_device_hotplug(); | 1885 | lock_device_hotplug(); |
1866 | __remove_memory(nid, start, size); | 1886 | rc = try_remove_memory(nid, start, size); |
1867 | unlock_device_hotplug(); | 1887 | unlock_device_hotplug(); |
1888 | |||
1889 | return rc; | ||
1868 | } | 1890 | } |
1869 | EXPORT_SYMBOL_GPL(remove_memory); | 1891 | EXPORT_SYMBOL_GPL(remove_memory); |
1870 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 1892 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
diff --git a/mm/nommu.c b/mm/nommu.c index eb3e2e558da1..fed1b6e9c89b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1261,7 +1261,9 @@ unsigned long do_mmap(struct file *file, | |||
1261 | add_nommu_region(region); | 1261 | add_nommu_region(region); |
1262 | 1262 | ||
1263 | /* clear anonymous mappings that don't ask for uninitialized data */ | 1263 | /* clear anonymous mappings that don't ask for uninitialized data */ |
1264 | if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) | 1264 | if (!vma->vm_file && |
1265 | (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) || | ||
1266 | !(flags & MAP_UNINITIALIZED))) | ||
1265 | memset((void *)region->vm_start, 0, | 1267 | memset((void *)region->vm_start, 0, |
1266 | region->vm_end - region->vm_start); | 1268 | region->vm_end - region->vm_start); |
1267 | 1269 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8fd7f45a04eb..e515bfcf7f28 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4102,7 +4102,6 @@ static int | |||
4102 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, | 4102 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, |
4103 | const struct alloc_context *ac) | 4103 | const struct alloc_context *ac) |
4104 | { | 4104 | { |
4105 | struct reclaim_state reclaim_state; | ||
4106 | int progress; | 4105 | int progress; |
4107 | unsigned int noreclaim_flag; | 4106 | unsigned int noreclaim_flag; |
4108 | unsigned long pflags; | 4107 | unsigned long pflags; |
@@ -4114,13 +4113,10 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
4114 | psi_memstall_enter(&pflags); | 4113 | psi_memstall_enter(&pflags); |
4115 | fs_reclaim_acquire(gfp_mask); | 4114 | fs_reclaim_acquire(gfp_mask); |
4116 | noreclaim_flag = memalloc_noreclaim_save(); | 4115 | noreclaim_flag = memalloc_noreclaim_save(); |
4117 | reclaim_state.reclaimed_slab = 0; | ||
4118 | current->reclaim_state = &reclaim_state; | ||
4119 | 4116 | ||
4120 | progress = try_to_free_pages(ac->zonelist, order, gfp_mask, | 4117 | progress = try_to_free_pages(ac->zonelist, order, gfp_mask, |
4121 | ac->nodemask); | 4118 | ac->nodemask); |
4122 | 4119 | ||
4123 | current->reclaim_state = NULL; | ||
4124 | memalloc_noreclaim_restore(noreclaim_flag); | 4120 | memalloc_noreclaim_restore(noreclaim_flag); |
4125 | fs_reclaim_release(gfp_mask); | 4121 | fs_reclaim_release(gfp_mask); |
4126 | psi_memstall_leave(&pflags); | 4122 | psi_memstall_leave(&pflags); |
diff --git a/mm/shmem.c b/mm/shmem.c index f4dce9c8670d..99497cb32e71 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -400,7 +400,7 @@ static bool shmem_confirm_swap(struct address_space *mapping, | |||
400 | 400 | ||
401 | static int shmem_huge __read_mostly; | 401 | static int shmem_huge __read_mostly; |
402 | 402 | ||
403 | #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) | 403 | #if defined(CONFIG_SYSFS) |
404 | static int shmem_parse_huge(const char *str) | 404 | static int shmem_parse_huge(const char *str) |
405 | { | 405 | { |
406 | if (!strcmp(str, "never")) | 406 | if (!strcmp(str, "never")) |
@@ -417,7 +417,9 @@ static int shmem_parse_huge(const char *str) | |||
417 | return SHMEM_HUGE_FORCE; | 417 | return SHMEM_HUGE_FORCE; |
418 | return -EINVAL; | 418 | return -EINVAL; |
419 | } | 419 | } |
420 | #endif | ||
420 | 421 | ||
422 | #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) | ||
421 | static const char *shmem_format_huge(int huge) | 423 | static const char *shmem_format_huge(int huge) |
422 | { | 424 | { |
423 | switch (huge) { | 425 | switch (huge) { |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 6c49dbb3769e..807490fe217a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -1028,7 +1028,8 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, | |||
1028 | } | 1028 | } |
1029 | 1029 | ||
1030 | struct kmem_cache * | 1030 | struct kmem_cache * |
1031 | kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; | 1031 | kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = |
1032 | { /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; | ||
1032 | EXPORT_SYMBOL(kmalloc_caches); | 1033 | EXPORT_SYMBOL(kmalloc_caches); |
1033 | 1034 | ||
1034 | /* | 1035 | /* |
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/err.h> | 7 | #include <linux/err.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/sched/mm.h> | 9 | #include <linux/sched/mm.h> |
10 | #include <linux/sched/signal.h> | ||
10 | #include <linux/sched/task_stack.h> | 11 | #include <linux/sched/task_stack.h> |
11 | #include <linux/security.h> | 12 | #include <linux/security.h> |
12 | #include <linux/swap.h> | 13 | #include <linux/swap.h> |
@@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) | |||
300 | } | 301 | } |
301 | #endif | 302 | #endif |
302 | 303 | ||
304 | /** | ||
305 | * __account_locked_vm - account locked pages to an mm's locked_vm | ||
306 | * @mm: mm to account against | ||
307 | * @pages: number of pages to account | ||
308 | * @inc: %true if @pages should be considered positive, %false if not | ||
309 | * @task: task used to check RLIMIT_MEMLOCK | ||
310 | * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped | ||
311 | * | ||
312 | * Assumes @task and @mm are valid (i.e. at least one reference on each), and | ||
313 | * that mmap_sem is held as writer. | ||
314 | * | ||
315 | * Return: | ||
316 | * * 0 on success | ||
317 | * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. | ||
318 | */ | ||
319 | int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, | ||
320 | struct task_struct *task, bool bypass_rlim) | ||
321 | { | ||
322 | unsigned long locked_vm, limit; | ||
323 | int ret = 0; | ||
324 | |||
325 | lockdep_assert_held_write(&mm->mmap_sem); | ||
326 | |||
327 | locked_vm = mm->locked_vm; | ||
328 | if (inc) { | ||
329 | if (!bypass_rlim) { | ||
330 | limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
331 | if (locked_vm + pages > limit) | ||
332 | ret = -ENOMEM; | ||
333 | } | ||
334 | if (!ret) | ||
335 | mm->locked_vm = locked_vm + pages; | ||
336 | } else { | ||
337 | WARN_ON_ONCE(pages > locked_vm); | ||
338 | mm->locked_vm = locked_vm - pages; | ||
339 | } | ||
340 | |||
341 | pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, | ||
342 | (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, | ||
343 | locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), | ||
344 | ret ? " - exceeded" : ""); | ||
345 | |||
346 | return ret; | ||
347 | } | ||
348 | EXPORT_SYMBOL_GPL(__account_locked_vm); | ||
349 | |||
350 | /** | ||
351 | * account_locked_vm - account locked pages to an mm's locked_vm | ||
352 | * @mm: mm to account against, may be NULL | ||
353 | * @pages: number of pages to account | ||
354 | * @inc: %true if @pages should be considered positive, %false if not | ||
355 | * | ||
356 | * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). | ||
357 | * | ||
358 | * Return: | ||
359 | * * 0 on success, or if mm is NULL | ||
360 | * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. | ||
361 | */ | ||
362 | int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) | ||
363 | { | ||
364 | int ret; | ||
365 | |||
366 | if (pages == 0 || !mm) | ||
367 | return 0; | ||
368 | |||
369 | down_write(&mm->mmap_sem); | ||
370 | ret = __account_locked_vm(mm, pages, inc, current, | ||
371 | capable(CAP_IPC_LOCK)); | ||
372 | up_write(&mm->mmap_sem); | ||
373 | |||
374 | return ret; | ||
375 | } | ||
376 | EXPORT_SYMBOL_GPL(account_locked_vm); | ||
377 | |||
303 | unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, | 378 | unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, |
304 | unsigned long len, unsigned long prot, | 379 | unsigned long len, unsigned long prot, |
305 | unsigned long flag, unsigned long pgoff) | 380 | unsigned long flag, unsigned long pgoff) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f8e3dcd527b8..44df66a98f2a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -131,6 +131,9 @@ struct scan_control { | |||
131 | unsigned int file_taken; | 131 | unsigned int file_taken; |
132 | unsigned int taken; | 132 | unsigned int taken; |
133 | } nr; | 133 | } nr; |
134 | |||
135 | /* for recording the reclaimed slab by now */ | ||
136 | struct reclaim_state reclaim_state; | ||
134 | }; | 137 | }; |
135 | 138 | ||
136 | #ifdef ARCH_HAS_PREFETCH | 139 | #ifdef ARCH_HAS_PREFETCH |
@@ -238,6 +241,18 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker) | |||
238 | } | 241 | } |
239 | #endif /* CONFIG_MEMCG_KMEM */ | 242 | #endif /* CONFIG_MEMCG_KMEM */ |
240 | 243 | ||
244 | static void set_task_reclaim_state(struct task_struct *task, | ||
245 | struct reclaim_state *rs) | ||
246 | { | ||
247 | /* Check for an overwrite */ | ||
248 | WARN_ON_ONCE(rs && task->reclaim_state); | ||
249 | |||
250 | /* Check for the nulling of an already-nulled member */ | ||
251 | WARN_ON_ONCE(!rs && !task->reclaim_state); | ||
252 | |||
253 | task->reclaim_state = rs; | ||
254 | } | ||
255 | |||
241 | #ifdef CONFIG_MEMCG | 256 | #ifdef CONFIG_MEMCG |
242 | static bool global_reclaim(struct scan_control *sc) | 257 | static bool global_reclaim(struct scan_control *sc) |
243 | { | 258 | { |
@@ -3191,11 +3206,13 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
3191 | if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) | 3206 | if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) |
3192 | return 1; | 3207 | return 1; |
3193 | 3208 | ||
3209 | set_task_reclaim_state(current, &sc.reclaim_state); | ||
3194 | trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); | 3210 | trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); |
3195 | 3211 | ||
3196 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | 3212 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); |
3197 | 3213 | ||
3198 | trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); | 3214 | trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); |
3215 | set_task_reclaim_state(current, NULL); | ||
3199 | 3216 | ||
3200 | return nr_reclaimed; | 3217 | return nr_reclaimed; |
3201 | } | 3218 | } |
@@ -3218,6 +3235,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, | |||
3218 | }; | 3235 | }; |
3219 | unsigned long lru_pages; | 3236 | unsigned long lru_pages; |
3220 | 3237 | ||
3238 | set_task_reclaim_state(current, &sc.reclaim_state); | ||
3221 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 3239 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
3222 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 3240 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
3223 | 3241 | ||
@@ -3235,7 +3253,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, | |||
3235 | 3253 | ||
3236 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 3254 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
3237 | 3255 | ||
3256 | set_task_reclaim_state(current, NULL); | ||
3238 | *nr_scanned = sc.nr_scanned; | 3257 | *nr_scanned = sc.nr_scanned; |
3258 | |||
3239 | return sc.nr_reclaimed; | 3259 | return sc.nr_reclaimed; |
3240 | } | 3260 | } |
3241 | 3261 | ||
@@ -3262,6 +3282,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
3262 | .may_shrinkslab = 1, | 3282 | .may_shrinkslab = 1, |
3263 | }; | 3283 | }; |
3264 | 3284 | ||
3285 | set_task_reclaim_state(current, &sc.reclaim_state); | ||
3265 | /* | 3286 | /* |
3266 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't | 3287 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't |
3267 | * take care of from where we get pages. So the node where we start the | 3288 | * take care of from where we get pages. So the node where we start the |
@@ -3282,6 +3303,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
3282 | psi_memstall_leave(&pflags); | 3303 | psi_memstall_leave(&pflags); |
3283 | 3304 | ||
3284 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); | 3305 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |
3306 | set_task_reclaim_state(current, NULL); | ||
3285 | 3307 | ||
3286 | return nr_reclaimed; | 3308 | return nr_reclaimed; |
3287 | } | 3309 | } |
@@ -3483,6 +3505,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
3483 | .may_unmap = 1, | 3505 | .may_unmap = 1, |
3484 | }; | 3506 | }; |
3485 | 3507 | ||
3508 | set_task_reclaim_state(current, &sc.reclaim_state); | ||
3486 | psi_memstall_enter(&pflags); | 3509 | psi_memstall_enter(&pflags); |
3487 | __fs_reclaim_acquire(); | 3510 | __fs_reclaim_acquire(); |
3488 | 3511 | ||
@@ -3664,6 +3687,8 @@ out: | |||
3664 | snapshot_refaults(NULL, pgdat); | 3687 | snapshot_refaults(NULL, pgdat); |
3665 | __fs_reclaim_release(); | 3688 | __fs_reclaim_release(); |
3666 | psi_memstall_leave(&pflags); | 3689 | psi_memstall_leave(&pflags); |
3690 | set_task_reclaim_state(current, NULL); | ||
3691 | |||
3667 | /* | 3692 | /* |
3668 | * Return the order kswapd stopped reclaiming at as | 3693 | * Return the order kswapd stopped reclaiming at as |
3669 | * prepare_kswapd_sleep() takes it into account. If another caller | 3694 | * prepare_kswapd_sleep() takes it into account. If another caller |
@@ -3787,15 +3812,10 @@ static int kswapd(void *p) | |||
3787 | unsigned int classzone_idx = MAX_NR_ZONES - 1; | 3812 | unsigned int classzone_idx = MAX_NR_ZONES - 1; |
3788 | pg_data_t *pgdat = (pg_data_t*)p; | 3813 | pg_data_t *pgdat = (pg_data_t*)p; |
3789 | struct task_struct *tsk = current; | 3814 | struct task_struct *tsk = current; |
3790 | |||
3791 | struct reclaim_state reclaim_state = { | ||
3792 | .reclaimed_slab = 0, | ||
3793 | }; | ||
3794 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 3815 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
3795 | 3816 | ||
3796 | if (!cpumask_empty(cpumask)) | 3817 | if (!cpumask_empty(cpumask)) |
3797 | set_cpus_allowed_ptr(tsk, cpumask); | 3818 | set_cpus_allowed_ptr(tsk, cpumask); |
3798 | current->reclaim_state = &reclaim_state; | ||
3799 | 3819 | ||
3800 | /* | 3820 | /* |
3801 | * Tell the memory management that we're a "memory allocator", | 3821 | * Tell the memory management that we're a "memory allocator", |
@@ -3857,7 +3877,6 @@ kswapd_try_sleep: | |||
3857 | } | 3877 | } |
3858 | 3878 | ||
3859 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); | 3879 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); |
3860 | current->reclaim_state = NULL; | ||
3861 | 3880 | ||
3862 | return 0; | 3881 | return 0; |
3863 | } | 3882 | } |
@@ -3922,7 +3941,6 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, | |||
3922 | */ | 3941 | */ |
3923 | unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | 3942 | unsigned long shrink_all_memory(unsigned long nr_to_reclaim) |
3924 | { | 3943 | { |
3925 | struct reclaim_state reclaim_state; | ||
3926 | struct scan_control sc = { | 3944 | struct scan_control sc = { |
3927 | .nr_to_reclaim = nr_to_reclaim, | 3945 | .nr_to_reclaim = nr_to_reclaim, |
3928 | .gfp_mask = GFP_HIGHUSER_MOVABLE, | 3946 | .gfp_mask = GFP_HIGHUSER_MOVABLE, |
@@ -3934,18 +3952,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
3934 | .hibernation_mode = 1, | 3952 | .hibernation_mode = 1, |
3935 | }; | 3953 | }; |
3936 | struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); | 3954 | struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); |
3937 | struct task_struct *p = current; | ||
3938 | unsigned long nr_reclaimed; | 3955 | unsigned long nr_reclaimed; |
3939 | unsigned int noreclaim_flag; | 3956 | unsigned int noreclaim_flag; |
3940 | 3957 | ||
3941 | fs_reclaim_acquire(sc.gfp_mask); | 3958 | fs_reclaim_acquire(sc.gfp_mask); |
3942 | noreclaim_flag = memalloc_noreclaim_save(); | 3959 | noreclaim_flag = memalloc_noreclaim_save(); |
3943 | reclaim_state.reclaimed_slab = 0; | 3960 | set_task_reclaim_state(current, &sc.reclaim_state); |
3944 | p->reclaim_state = &reclaim_state; | ||
3945 | 3961 | ||
3946 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | 3962 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); |
3947 | 3963 | ||
3948 | p->reclaim_state = NULL; | 3964 | set_task_reclaim_state(current, NULL); |
3949 | memalloc_noreclaim_restore(noreclaim_flag); | 3965 | memalloc_noreclaim_restore(noreclaim_flag); |
3950 | fs_reclaim_release(sc.gfp_mask); | 3966 | fs_reclaim_release(sc.gfp_mask); |
3951 | 3967 | ||
@@ -4110,7 +4126,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
4110 | /* Minimum pages needed in order to stay on node */ | 4126 | /* Minimum pages needed in order to stay on node */ |
4111 | const unsigned long nr_pages = 1 << order; | 4127 | const unsigned long nr_pages = 1 << order; |
4112 | struct task_struct *p = current; | 4128 | struct task_struct *p = current; |
4113 | struct reclaim_state reclaim_state; | ||
4114 | unsigned int noreclaim_flag; | 4129 | unsigned int noreclaim_flag; |
4115 | struct scan_control sc = { | 4130 | struct scan_control sc = { |
4116 | .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), | 4131 | .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), |
@@ -4135,8 +4150,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
4135 | */ | 4150 | */ |
4136 | noreclaim_flag = memalloc_noreclaim_save(); | 4151 | noreclaim_flag = memalloc_noreclaim_save(); |
4137 | p->flags |= PF_SWAPWRITE; | 4152 | p->flags |= PF_SWAPWRITE; |
4138 | reclaim_state.reclaimed_slab = 0; | 4153 | set_task_reclaim_state(p, &sc.reclaim_state); |
4139 | p->reclaim_state = &reclaim_state; | ||
4140 | 4154 | ||
4141 | if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { | 4155 | if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { |
4142 | /* | 4156 | /* |
@@ -4148,7 +4162,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
4148 | } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); | 4162 | } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); |
4149 | } | 4163 | } |
4150 | 4164 | ||
4151 | p->reclaim_state = NULL; | 4165 | set_task_reclaim_state(p, NULL); |
4152 | current->flags &= ~PF_SWAPWRITE; | 4166 | current->flags &= ~PF_SWAPWRITE; |
4153 | memalloc_noreclaim_restore(noreclaim_flag); | 4167 | memalloc_noreclaim_restore(noreclaim_flag); |
4154 | fs_reclaim_release(sc.gfp_mask); | 4168 | fs_reclaim_release(sc.gfp_mask); |
diff --git a/mm/z3fold.c b/mm/z3fold.c index dfcd69d08c1e..6c72b18d8b9c 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
@@ -101,6 +101,7 @@ struct z3fold_buddy_slots { | |||
101 | * @refcount: reference count for the z3fold page | 101 | * @refcount: reference count for the z3fold page |
102 | * @work: work_struct for page layout optimization | 102 | * @work: work_struct for page layout optimization |
103 | * @slots: pointer to the structure holding buddy slots | 103 | * @slots: pointer to the structure holding buddy slots |
104 | * @pool: pointer to the containing pool | ||
104 | * @cpu: CPU which this page "belongs" to | 105 | * @cpu: CPU which this page "belongs" to |
105 | * @first_chunks: the size of the first buddy in chunks, 0 if free | 106 | * @first_chunks: the size of the first buddy in chunks, 0 if free |
106 | * @middle_chunks: the size of the middle buddy in chunks, 0 if free | 107 | * @middle_chunks: the size of the middle buddy in chunks, 0 if free |
@@ -114,6 +115,7 @@ struct z3fold_header { | |||
114 | struct kref refcount; | 115 | struct kref refcount; |
115 | struct work_struct work; | 116 | struct work_struct work; |
116 | struct z3fold_buddy_slots *slots; | 117 | struct z3fold_buddy_slots *slots; |
118 | struct z3fold_pool *pool; | ||
117 | short cpu; | 119 | short cpu; |
118 | unsigned short first_chunks; | 120 | unsigned short first_chunks; |
119 | unsigned short middle_chunks; | 121 | unsigned short middle_chunks; |
@@ -193,8 +195,10 @@ static void compact_page_work(struct work_struct *w); | |||
193 | static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, | 195 | static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, |
194 | gfp_t gfp) | 196 | gfp_t gfp) |
195 | { | 197 | { |
196 | struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle, | 198 | struct z3fold_buddy_slots *slots; |
197 | gfp); | 199 | |
200 | slots = kmem_cache_alloc(pool->c_handle, | ||
201 | (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); | ||
198 | 202 | ||
199 | if (slots) { | 203 | if (slots) { |
200 | memset(slots->slot, 0, sizeof(slots->slot)); | 204 | memset(slots->slot, 0, sizeof(slots->slot)); |
@@ -320,6 +324,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, | |||
320 | zhdr->start_middle = 0; | 324 | zhdr->start_middle = 0; |
321 | zhdr->cpu = -1; | 325 | zhdr->cpu = -1; |
322 | zhdr->slots = slots; | 326 | zhdr->slots = slots; |
327 | zhdr->pool = pool; | ||
323 | INIT_LIST_HEAD(&zhdr->buddy); | 328 | INIT_LIST_HEAD(&zhdr->buddy); |
324 | INIT_WORK(&zhdr->work, compact_page_work); | 329 | INIT_WORK(&zhdr->work, compact_page_work); |
325 | return zhdr; | 330 | return zhdr; |
@@ -426,7 +431,7 @@ static enum buddy handle_to_buddy(unsigned long handle) | |||
426 | 431 | ||
427 | static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) | 432 | static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) |
428 | { | 433 | { |
429 | return slots_to_pool(zhdr->slots); | 434 | return zhdr->pool; |
430 | } | 435 | } |
431 | 436 | ||
432 | static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) | 437 | static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) |
@@ -850,7 +855,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, | |||
850 | enum buddy bud; | 855 | enum buddy bud; |
851 | bool can_sleep = gfpflags_allow_blocking(gfp); | 856 | bool can_sleep = gfpflags_allow_blocking(gfp); |
852 | 857 | ||
853 | if (!size || (gfp & __GFP_HIGHMEM)) | 858 | if (!size) |
854 | return -EINVAL; | 859 | return -EINVAL; |
855 | 860 | ||
856 | if (size > PAGE_SIZE) | 861 | if (size > PAGE_SIZE) |
@@ -1345,24 +1350,29 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa | |||
1345 | zhdr = page_address(page); | 1350 | zhdr = page_address(page); |
1346 | pool = zhdr_to_pool(zhdr); | 1351 | pool = zhdr_to_pool(zhdr); |
1347 | 1352 | ||
1348 | if (!trylock_page(page)) | ||
1349 | return -EAGAIN; | ||
1350 | |||
1351 | if (!z3fold_page_trylock(zhdr)) { | 1353 | if (!z3fold_page_trylock(zhdr)) { |
1352 | unlock_page(page); | ||
1353 | return -EAGAIN; | 1354 | return -EAGAIN; |
1354 | } | 1355 | } |
1355 | if (zhdr->mapped_count != 0) { | 1356 | if (zhdr->mapped_count != 0) { |
1356 | z3fold_page_unlock(zhdr); | 1357 | z3fold_page_unlock(zhdr); |
1357 | unlock_page(page); | ||
1358 | return -EBUSY; | 1358 | return -EBUSY; |
1359 | } | 1359 | } |
1360 | if (work_pending(&zhdr->work)) { | ||
1361 | z3fold_page_unlock(zhdr); | ||
1362 | return -EAGAIN; | ||
1363 | } | ||
1360 | new_zhdr = page_address(newpage); | 1364 | new_zhdr = page_address(newpage); |
1361 | memcpy(new_zhdr, zhdr, PAGE_SIZE); | 1365 | memcpy(new_zhdr, zhdr, PAGE_SIZE); |
1362 | newpage->private = page->private; | 1366 | newpage->private = page->private; |
1363 | page->private = 0; | 1367 | page->private = 0; |
1364 | z3fold_page_unlock(zhdr); | 1368 | z3fold_page_unlock(zhdr); |
1365 | spin_lock_init(&new_zhdr->page_lock); | 1369 | spin_lock_init(&new_zhdr->page_lock); |
1370 | INIT_WORK(&new_zhdr->work, compact_page_work); | ||
1371 | /* | ||
1372 | * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, | ||
1373 | * so we only have to reinitialize it. | ||
1374 | */ | ||
1375 | INIT_LIST_HEAD(&new_zhdr->buddy); | ||
1366 | new_mapping = page_mapping(page); | 1376 | new_mapping = page_mapping(page); |
1367 | __ClearPageMovable(page); | 1377 | __ClearPageMovable(page); |
1368 | ClearPagePrivate(page); | 1378 | ClearPagePrivate(page); |
@@ -1386,7 +1396,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa | |||
1386 | queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); | 1396 | queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); |
1387 | 1397 | ||
1388 | page_mapcount_reset(page); | 1398 | page_mapcount_reset(page); |
1389 | unlock_page(page); | ||
1390 | put_page(page); | 1399 | put_page(page); |
1391 | return 0; | 1400 | return 0; |
1392 | } | 1401 | } |