diff options
| -rw-r--r-- | arch/arm64/include/asm/elf.h | 4 | ||||
| -rw-r--r-- | arch/powerpc/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/elf.h | 4 | ||||
| -rw-r--r-- | include/linux/memblock.h | 6 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 10 | ||||
| -rw-r--r-- | include/linux/oom.h | 22 | ||||
| -rw-r--r-- | include/linux/wait.h | 37 | ||||
| -rw-r--r-- | kernel/kmod.c | 25 | ||||
| -rw-r--r-- | kernel/signal.c | 6 | ||||
| -rw-r--r-- | mm/cma_debug.c | 2 | ||||
| -rw-r--r-- | mm/huge_memory.c | 30 | ||||
| -rw-r--r-- | mm/memblock.c | 38 | ||||
| -rw-r--r-- | mm/memcontrol.c | 43 | ||||
| -rw-r--r-- | mm/memory.c | 36 | ||||
| -rw-r--r-- | mm/mempolicy.c | 5 | ||||
| -rw-r--r-- | mm/nobootmem.c | 16 | ||||
| -rw-r--r-- | mm/page-writeback.c | 15 | ||||
| -rw-r--r-- | mm/page_alloc.c | 4 | ||||
| -rw-r--r-- | mm/slub.c | 3 | ||||
| -rw-r--r-- | mm/vmalloc.c | 13 | ||||
| -rwxr-xr-x | tools/testing/selftests/kmod/kmod.sh | 4 |
22 files changed, 224 insertions, 103 deletions
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index acae781f7359..3288c2b36731 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h | |||
| @@ -114,10 +114,10 @@ | |||
| 114 | 114 | ||
| 115 | /* | 115 | /* |
| 116 | * This is the base location for PIE (ET_DYN with INTERP) loads. On | 116 | * This is the base location for PIE (ET_DYN with INTERP) loads. On |
| 117 | * 64-bit, this is raised to 4GB to leave the entire 32-bit address | 117 | * 64-bit, this is above 4GB to leave the entire 32-bit address |
| 118 | * space open for things that want to use the area for 32-bit pointers. | 118 | * space open for things that want to use the area for 32-bit pointers. |
| 119 | */ | 119 | */ |
| 120 | #define ELF_ET_DYN_BASE 0x100000000UL | 120 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) |
| 121 | 121 | ||
| 122 | #ifndef __ASSEMBLY__ | 122 | #ifndef __ASSEMBLY__ |
| 123 | 123 | ||
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36f858c37ca7..81b0031f909f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
| @@ -199,7 +199,7 @@ config PPC | |||
| 199 | select HAVE_OPTPROBES if PPC64 | 199 | select HAVE_OPTPROBES if PPC64 |
| 200 | select HAVE_PERF_EVENTS | 200 | select HAVE_PERF_EVENTS |
| 201 | select HAVE_PERF_EVENTS_NMI if PPC64 | 201 | select HAVE_PERF_EVENTS_NMI if PPC64 |
| 202 | select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH | 202 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH |
| 203 | select HAVE_PERF_REGS | 203 | select HAVE_PERF_REGS |
| 204 | select HAVE_PERF_USER_STACK_DUMP | 204 | select HAVE_PERF_USER_STACK_DUMP |
| 205 | select HAVE_RCU_TABLE_FREE if SMP | 205 | select HAVE_RCU_TABLE_FREE if SMP |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..29a1bf85e507 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -163,7 +163,7 @@ config X86 | |||
| 163 | select HAVE_PCSPKR_PLATFORM | 163 | select HAVE_PCSPKR_PLATFORM |
| 164 | select HAVE_PERF_EVENTS | 164 | select HAVE_PERF_EVENTS |
| 165 | select HAVE_PERF_EVENTS_NMI | 165 | select HAVE_PERF_EVENTS_NMI |
| 166 | select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI | 166 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI |
| 167 | select HAVE_PERF_REGS | 167 | select HAVE_PERF_REGS |
| 168 | select HAVE_PERF_USER_STACK_DUMP | 168 | select HAVE_PERF_USER_STACK_DUMP |
| 169 | select HAVE_REGS_AND_STACK_ACCESS_API | 169 | select HAVE_REGS_AND_STACK_ACCESS_API |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1c18d83d3f09..9aeb91935ce0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
| @@ -247,11 +247,11 @@ extern int force_personality32; | |||
| 247 | 247 | ||
| 248 | /* | 248 | /* |
| 249 | * This is the base location for PIE (ET_DYN with INTERP) loads. On | 249 | * This is the base location for PIE (ET_DYN with INTERP) loads. On |
| 250 | * 64-bit, this is raised to 4GB to leave the entire 32-bit address | 250 | * 64-bit, this is above 4GB to leave the entire 32-bit address |
| 251 | * space open for things that want to use the area for 32-bit pointers. | 251 | * space open for things that want to use the area for 32-bit pointers. |
| 252 | */ | 252 | */ |
| 253 | #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ | 253 | #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ |
| 254 | 0x100000000UL) | 254 | (TASK_SIZE / 3 * 2)) |
| 255 | 255 | ||
| 256 | /* This yields a mask that user programs can use to figure out what | 256 | /* This yields a mask that user programs can use to figure out what |
| 257 | instruction set this CPU supports. This could be done in user space, | 257 | instruction set this CPU supports. This could be done in user space, |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 77d427974f57..bae11c7e7bf3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
| @@ -61,6 +61,7 @@ extern int memblock_debug; | |||
| 61 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | 61 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK |
| 62 | #define __init_memblock __meminit | 62 | #define __init_memblock __meminit |
| 63 | #define __initdata_memblock __meminitdata | 63 | #define __initdata_memblock __meminitdata |
| 64 | void memblock_discard(void); | ||
| 64 | #else | 65 | #else |
| 65 | #define __init_memblock | 66 | #define __init_memblock |
| 66 | #define __initdata_memblock | 67 | #define __initdata_memblock |
| @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, | |||
| 74 | int nid, ulong flags); | 75 | int nid, ulong flags); |
| 75 | phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, | 76 | phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, |
| 76 | phys_addr_t size, phys_addr_t align); | 77 | phys_addr_t size, phys_addr_t align); |
| 77 | phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); | ||
| 78 | phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); | ||
| 79 | void memblock_allow_resize(void); | 78 | void memblock_allow_resize(void); |
| 80 | int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); | 79 | int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); |
| 81 | int memblock_add(phys_addr_t base, phys_addr_t size); | 80 | int memblock_add(phys_addr_t base, phys_addr_t size); |
| @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, | |||
| 110 | void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, | 109 | void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, |
| 111 | phys_addr_t *out_end); | 110 | phys_addr_t *out_end); |
| 112 | 111 | ||
| 112 | void __memblock_free_early(phys_addr_t base, phys_addr_t size); | ||
| 113 | void __memblock_free_late(phys_addr_t base, phys_addr_t size); | ||
| 114 | |||
| 113 | /** | 115 | /** |
| 114 | * for_each_mem_range - iterate through memblock areas from type_a and not | 116 | * for_each_mem_range - iterate through memblock areas from type_a and not |
| 115 | * included in type_b. Or just type_a if type_b is NULL. | 117 | * included in type_b. Or just type_a if type_b is NULL. |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3914e3dd6168..9b15a4bcfa77 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
| @@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait); | |||
| 484 | extern int do_swap_account; | 484 | extern int do_swap_account; |
| 485 | #endif | 485 | #endif |
| 486 | 486 | ||
| 487 | void lock_page_memcg(struct page *page); | 487 | struct mem_cgroup *lock_page_memcg(struct page *page); |
| 488 | void __unlock_page_memcg(struct mem_cgroup *memcg); | ||
| 488 | void unlock_page_memcg(struct page *page); | 489 | void unlock_page_memcg(struct page *page); |
| 489 | 490 | ||
| 490 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, | 491 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, |
| @@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
| 809 | { | 810 | { |
| 810 | } | 811 | } |
| 811 | 812 | ||
| 812 | static inline void lock_page_memcg(struct page *page) | 813 | static inline struct mem_cgroup *lock_page_memcg(struct page *page) |
| 814 | { | ||
| 815 | return NULL; | ||
| 816 | } | ||
| 817 | |||
| 818 | static inline void __unlock_page_memcg(struct mem_cgroup *memcg) | ||
| 813 | { | 819 | { |
| 814 | } | 820 | } |
| 815 | 821 | ||
diff --git a/include/linux/oom.h b/include/linux/oom.h index 8a266e2be5a6..76aac4ce39bc 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
| 7 | #include <linux/nodemask.h> | 7 | #include <linux/nodemask.h> |
| 8 | #include <uapi/linux/oom.h> | 8 | #include <uapi/linux/oom.h> |
| 9 | #include <linux/sched/coredump.h> /* MMF_* */ | ||
| 10 | #include <linux/mm.h> /* VM_FAULT* */ | ||
| 9 | 11 | ||
| 10 | struct zonelist; | 12 | struct zonelist; |
| 11 | struct notifier_block; | 13 | struct notifier_block; |
| @@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) | |||
| 63 | return tsk->signal->oom_mm; | 65 | return tsk->signal->oom_mm; |
| 64 | } | 66 | } |
| 65 | 67 | ||
| 68 | /* | ||
| 69 | * Checks whether a page fault on the given mm is still reliable. | ||
| 70 | * This is no longer true if the oom reaper started to reap the | ||
| 71 | * address space which is reflected by MMF_UNSTABLE flag set in | ||
| 72 | * the mm. At that moment any !shared mapping would lose the content | ||
| 73 | * and could cause a memory corruption (zero pages instead of the | ||
| 74 | * original content). | ||
| 75 | * | ||
| 76 | * User should call this before establishing a page table entry for | ||
| 77 | * a !shared mapping and under the proper page table lock. | ||
| 78 | * | ||
| 79 | * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise. | ||
| 80 | */ | ||
| 81 | static inline int check_stable_address_space(struct mm_struct *mm) | ||
| 82 | { | ||
| 83 | if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) | ||
| 84 | return VM_FAULT_SIGBUS; | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 66 | extern unsigned long oom_badness(struct task_struct *p, | 88 | extern unsigned long oom_badness(struct task_struct *p, |
| 67 | struct mem_cgroup *memcg, const nodemask_t *nodemask, | 89 | struct mem_cgroup *memcg, const nodemask_t *nodemask, |
| 68 | unsigned long totalpages); | 90 | unsigned long totalpages); |
diff --git a/include/linux/wait.h b/include/linux/wait.h index 5b74e36c0ca8..dc19880c02f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
| @@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); | |||
| 757 | __ret; \ | 757 | __ret; \ |
| 758 | }) | 758 | }) |
| 759 | 759 | ||
| 760 | #define __wait_event_killable_timeout(wq_head, condition, timeout) \ | ||
| 761 | ___wait_event(wq_head, ___wait_cond_timeout(condition), \ | ||
| 762 | TASK_KILLABLE, 0, timeout, \ | ||
| 763 | __ret = schedule_timeout(__ret)) | ||
| 764 | |||
| 765 | /** | ||
| 766 | * wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses | ||
| 767 | * @wq_head: the waitqueue to wait on | ||
| 768 | * @condition: a C expression for the event to wait for | ||
| 769 | * @timeout: timeout, in jiffies | ||
| 770 | * | ||
| 771 | * The process is put to sleep (TASK_KILLABLE) until the | ||
| 772 | * @condition evaluates to true or a kill signal is received. | ||
| 773 | * The @condition is checked each time the waitqueue @wq_head is woken up. | ||
| 774 | * | ||
| 775 | * wake_up() has to be called after changing any variable that could | ||
| 776 | * change the result of the wait condition. | ||
| 777 | * | ||
| 778 | * Returns: | ||
| 779 | * 0 if the @condition evaluated to %false after the @timeout elapsed, | ||
| 780 | * 1 if the @condition evaluated to %true after the @timeout elapsed, | ||
| 781 | * the remaining jiffies (at least 1) if the @condition evaluated | ||
| 782 | * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was | ||
| 783 | * interrupted by a kill signal. | ||
| 784 | * | ||
| 785 | * Only kill signals interrupt this process. | ||
| 786 | */ | ||
| 787 | #define wait_event_killable_timeout(wq_head, condition, timeout) \ | ||
| 788 | ({ \ | ||
| 789 | long __ret = timeout; \ | ||
| 790 | might_sleep(); \ | ||
| 791 | if (!___wait_cond_timeout(condition)) \ | ||
| 792 | __ret = __wait_event_killable_timeout(wq_head, \ | ||
| 793 | condition, timeout); \ | ||
| 794 | __ret; \ | ||
| 795 | }) | ||
| 796 | |||
| 760 | 797 | ||
| 761 | #define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ | 798 | #define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ |
| 762 | (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ | 799 | (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 6d016c5d97c8..2f37acde640b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -71,6 +71,18 @@ static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); | |||
| 71 | static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); | 71 | static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); |
| 72 | 72 | ||
| 73 | /* | 73 | /* |
| 74 | * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads | ||
| 75 | * running at the same time without returning. When this happens we | ||
| 76 | * believe you've somehow ended up with a recursive module dependency | ||
| 77 | * creating a loop. | ||
| 78 | * | ||
| 79 | * We have no option but to fail. | ||
| 80 | * | ||
| 81 | * Userspace should proactively try to detect and prevent these. | ||
| 82 | */ | ||
| 83 | #define MAX_KMOD_ALL_BUSY_TIMEOUT 5 | ||
| 84 | |||
| 85 | /* | ||
| 74 | modprobe_path is set via /proc/sys. | 86 | modprobe_path is set via /proc/sys. |
| 75 | */ | 87 | */ |
| 76 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; | 88 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; |
| @@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...) | |||
| 167 | pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", | 179 | pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", |
| 168 | atomic_read(&kmod_concurrent_max), | 180 | atomic_read(&kmod_concurrent_max), |
| 169 | MAX_KMOD_CONCURRENT, module_name); | 181 | MAX_KMOD_CONCURRENT, module_name); |
| 170 | wait_event_interruptible(kmod_wq, | 182 | ret = wait_event_killable_timeout(kmod_wq, |
| 171 | atomic_dec_if_positive(&kmod_concurrent_max) >= 0); | 183 | atomic_dec_if_positive(&kmod_concurrent_max) >= 0, |
| 184 | MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); | ||
| 185 | if (!ret) { | ||
| 186 | pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", | ||
| 187 | module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); | ||
| 188 | return -ETIME; | ||
| 189 | } else if (ret == -ERESTARTSYS) { | ||
| 190 | pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name); | ||
| 191 | return ret; | ||
| 192 | } | ||
| 172 | } | 193 | } |
| 173 | 194 | ||
| 174 | trace_module_request(module_name, wait, _RET_IP_); | 195 | trace_module_request(module_name, wait, _RET_IP_); |
diff --git a/kernel/signal.c b/kernel/signal.c index 7e33f8c583e6..ed804a470dcd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |||
| 1194 | recalc_sigpending_and_wake(t); | 1194 | recalc_sigpending_and_wake(t); |
| 1195 | } | 1195 | } |
| 1196 | } | 1196 | } |
| 1197 | if (action->sa.sa_handler == SIG_DFL) | 1197 | /* |
| 1198 | * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect | ||
| 1199 | * debugging to leave init killable. | ||
| 1200 | */ | ||
| 1201 | if (action->sa.sa_handler == SIG_DFL && !t->ptrace) | ||
| 1198 | t->signal->flags &= ~SIGNAL_UNKILLABLE; | 1202 | t->signal->flags &= ~SIGNAL_UNKILLABLE; |
| 1199 | ret = specific_send_sig_info(sig, info, t); | 1203 | ret = specific_send_sig_info(sig, info, t); |
| 1200 | spin_unlock_irqrestore(&t->sighand->siglock, flags); | 1204 | spin_unlock_irqrestore(&t->sighand->siglock, flags); |
diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 595b757bef72..c03ccbc405a0 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c | |||
| @@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) | |||
| 167 | char name[16]; | 167 | char name[16]; |
| 168 | int u32s; | 168 | int u32s; |
| 169 | 169 | ||
| 170 | sprintf(name, "cma-%s", cma->name); | 170 | scnprintf(name, sizeof(name), "cma-%s", cma->name); |
| 171 | 171 | ||
| 172 | tmp = debugfs_create_dir(name, cma_debugfs_root); | 172 | tmp = debugfs_create_dir(name, cma_debugfs_root); |
| 173 | 173 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 216114f6ef0b..90731e3b7e58 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/userfaultfd_k.h> | 32 | #include <linux/userfaultfd_k.h> |
| 33 | #include <linux/page_idle.h> | 33 | #include <linux/page_idle.h> |
| 34 | #include <linux/shmem_fs.h> | 34 | #include <linux/shmem_fs.h> |
| 35 | #include <linux/oom.h> | ||
| 35 | 36 | ||
| 36 | #include <asm/tlb.h> | 37 | #include <asm/tlb.h> |
| 37 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
| @@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
| 550 | struct mem_cgroup *memcg; | 551 | struct mem_cgroup *memcg; |
| 551 | pgtable_t pgtable; | 552 | pgtable_t pgtable; |
| 552 | unsigned long haddr = vmf->address & HPAGE_PMD_MASK; | 553 | unsigned long haddr = vmf->address & HPAGE_PMD_MASK; |
| 554 | int ret = 0; | ||
| 553 | 555 | ||
| 554 | VM_BUG_ON_PAGE(!PageCompound(page), page); | 556 | VM_BUG_ON_PAGE(!PageCompound(page), page); |
| 555 | 557 | ||
| @@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
| 561 | 563 | ||
| 562 | pgtable = pte_alloc_one(vma->vm_mm, haddr); | 564 | pgtable = pte_alloc_one(vma->vm_mm, haddr); |
| 563 | if (unlikely(!pgtable)) { | 565 | if (unlikely(!pgtable)) { |
| 564 | mem_cgroup_cancel_charge(page, memcg, true); | 566 | ret = VM_FAULT_OOM; |
| 565 | put_page(page); | 567 | goto release; |
| 566 | return VM_FAULT_OOM; | ||
| 567 | } | 568 | } |
| 568 | 569 | ||
| 569 | clear_huge_page(page, haddr, HPAGE_PMD_NR); | 570 | clear_huge_page(page, haddr, HPAGE_PMD_NR); |
| @@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
| 576 | 577 | ||
| 577 | vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); | 578 | vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); |
| 578 | if (unlikely(!pmd_none(*vmf->pmd))) { | 579 | if (unlikely(!pmd_none(*vmf->pmd))) { |
| 579 | spin_unlock(vmf->ptl); | 580 | goto unlock_release; |
| 580 | mem_cgroup_cancel_charge(page, memcg, true); | ||
| 581 | put_page(page); | ||
| 582 | pte_free(vma->vm_mm, pgtable); | ||
| 583 | } else { | 581 | } else { |
| 584 | pmd_t entry; | 582 | pmd_t entry; |
| 585 | 583 | ||
| 584 | ret = check_stable_address_space(vma->vm_mm); | ||
| 585 | if (ret) | ||
| 586 | goto unlock_release; | ||
| 587 | |||
| 586 | /* Deliver the page fault to userland */ | 588 | /* Deliver the page fault to userland */ |
| 587 | if (userfaultfd_missing(vma)) { | 589 | if (userfaultfd_missing(vma)) { |
| 588 | int ret; | 590 | int ret; |
| @@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
| 610 | } | 612 | } |
| 611 | 613 | ||
| 612 | return 0; | 614 | return 0; |
| 615 | unlock_release: | ||
| 616 | spin_unlock(vmf->ptl); | ||
| 617 | release: | ||
| 618 | if (pgtable) | ||
| 619 | pte_free(vma->vm_mm, pgtable); | ||
| 620 | mem_cgroup_cancel_charge(page, memcg, true); | ||
| 621 | put_page(page); | ||
| 622 | return ret; | ||
| 623 | |||
| 613 | } | 624 | } |
| 614 | 625 | ||
| 615 | /* | 626 | /* |
| @@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) | |||
| 688 | ret = 0; | 699 | ret = 0; |
| 689 | set = false; | 700 | set = false; |
| 690 | if (pmd_none(*vmf->pmd)) { | 701 | if (pmd_none(*vmf->pmd)) { |
| 691 | if (userfaultfd_missing(vma)) { | 702 | ret = check_stable_address_space(vma->vm_mm); |
| 703 | if (ret) { | ||
| 704 | spin_unlock(vmf->ptl); | ||
| 705 | } else if (userfaultfd_missing(vma)) { | ||
| 692 | spin_unlock(vmf->ptl); | 706 | spin_unlock(vmf->ptl); |
| 693 | ret = handle_userfault(vmf, VM_UFFD_MISSING); | 707 | ret = handle_userfault(vmf, VM_UFFD_MISSING); |
| 694 | VM_BUG_ON(ret & VM_FAULT_FALLBACK); | 708 | VM_BUG_ON(ret & VM_FAULT_FALLBACK); |
diff --git a/mm/memblock.c b/mm/memblock.c index 2cb25fe4452c..bf14aea6ab70 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
| @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u | |||
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | 287 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK |
| 288 | 288 | /** | |
| 289 | phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( | 289 | * Discard memory and reserved arrays if they were allocated |
| 290 | phys_addr_t *addr) | 290 | */ |
| 291 | { | 291 | void __init memblock_discard(void) |
| 292 | if (memblock.reserved.regions == memblock_reserved_init_regions) | ||
| 293 | return 0; | ||
| 294 | |||
| 295 | *addr = __pa(memblock.reserved.regions); | ||
| 296 | |||
| 297 | return PAGE_ALIGN(sizeof(struct memblock_region) * | ||
| 298 | memblock.reserved.max); | ||
| 299 | } | ||
| 300 | |||
| 301 | phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( | ||
| 302 | phys_addr_t *addr) | ||
| 303 | { | 292 | { |
| 304 | if (memblock.memory.regions == memblock_memory_init_regions) | 293 | phys_addr_t addr, size; |
| 305 | return 0; | ||
| 306 | 294 | ||
| 307 | *addr = __pa(memblock.memory.regions); | 295 | if (memblock.reserved.regions != memblock_reserved_init_regions) { |
| 296 | addr = __pa(memblock.reserved.regions); | ||
| 297 | size = PAGE_ALIGN(sizeof(struct memblock_region) * | ||
| 298 | memblock.reserved.max); | ||
| 299 | __memblock_free_late(addr, size); | ||
| 300 | } | ||
| 308 | 301 | ||
| 309 | return PAGE_ALIGN(sizeof(struct memblock_region) * | 302 | if (memblock.memory.regions == memblock_memory_init_regions) { |
| 310 | memblock.memory.max); | 303 | addr = __pa(memblock.memory.regions); |
| 304 | size = PAGE_ALIGN(sizeof(struct memblock_region) * | ||
| 305 | memblock.memory.max); | ||
| 306 | __memblock_free_late(addr, size); | ||
| 307 | } | ||
| 311 | } | 308 | } |
| 312 | |||
| 313 | #endif | 309 | #endif |
| 314 | 310 | ||
| 315 | /** | 311 | /** |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3df3c04d73ab..e09741af816f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -1611,9 +1611,13 @@ cleanup: | |||
| 1611 | * @page: the page | 1611 | * @page: the page |
| 1612 | * | 1612 | * |
| 1613 | * This function protects unlocked LRU pages from being moved to | 1613 | * This function protects unlocked LRU pages from being moved to |
| 1614 | * another cgroup and stabilizes their page->mem_cgroup binding. | 1614 | * another cgroup. |
| 1615 | * | ||
| 1616 | * It ensures lifetime of the returned memcg. Caller is responsible | ||
| 1617 | * for the lifetime of the page; __unlock_page_memcg() is available | ||
| 1618 | * when @page might get freed inside the locked section. | ||
| 1615 | */ | 1619 | */ |
| 1616 | void lock_page_memcg(struct page *page) | 1620 | struct mem_cgroup *lock_page_memcg(struct page *page) |
| 1617 | { | 1621 | { |
| 1618 | struct mem_cgroup *memcg; | 1622 | struct mem_cgroup *memcg; |
| 1619 | unsigned long flags; | 1623 | unsigned long flags; |
| @@ -1622,18 +1626,24 @@ void lock_page_memcg(struct page *page) | |||
| 1622 | * The RCU lock is held throughout the transaction. The fast | 1626 | * The RCU lock is held throughout the transaction. The fast |
| 1623 | * path can get away without acquiring the memcg->move_lock | 1627 | * path can get away without acquiring the memcg->move_lock |
| 1624 | * because page moving starts with an RCU grace period. | 1628 | * because page moving starts with an RCU grace period. |
| 1625 | */ | 1629 | * |
| 1630 | * The RCU lock also protects the memcg from being freed when | ||
| 1631 | * the page state that is going to change is the only thing | ||
| 1632 | * preventing the page itself from being freed. E.g. writeback | ||
| 1633 | * doesn't hold a page reference and relies on PG_writeback to | ||
| 1634 | * keep off truncation, migration and so forth. | ||
| 1635 | */ | ||
| 1626 | rcu_read_lock(); | 1636 | rcu_read_lock(); |
| 1627 | 1637 | ||
| 1628 | if (mem_cgroup_disabled()) | 1638 | if (mem_cgroup_disabled()) |
| 1629 | return; | 1639 | return NULL; |
| 1630 | again: | 1640 | again: |
| 1631 | memcg = page->mem_cgroup; | 1641 | memcg = page->mem_cgroup; |
| 1632 | if (unlikely(!memcg)) | 1642 | if (unlikely(!memcg)) |
| 1633 | return; | 1643 | return NULL; |
| 1634 | 1644 | ||
| 1635 | if (atomic_read(&memcg->moving_account) <= 0) | 1645 | if (atomic_read(&memcg->moving_account) <= 0) |
| 1636 | return; | 1646 | return memcg; |
| 1637 | 1647 | ||
| 1638 | spin_lock_irqsave(&memcg->move_lock, flags); | 1648 | spin_lock_irqsave(&memcg->move_lock, flags); |
| 1639 | if (memcg != page->mem_cgroup) { | 1649 | if (memcg != page->mem_cgroup) { |
| @@ -1649,18 +1659,18 @@ again: | |||
| 1649 | memcg->move_lock_task = current; | 1659 | memcg->move_lock_task = current; |
| 1650 | memcg->move_lock_flags = flags; | 1660 | memcg->move_lock_flags = flags; |
| 1651 | 1661 | ||
| 1652 | return; | 1662 | return memcg; |
| 1653 | } | 1663 | } |
| 1654 | EXPORT_SYMBOL(lock_page_memcg); | 1664 | EXPORT_SYMBOL(lock_page_memcg); |
| 1655 | 1665 | ||
| 1656 | /** | 1666 | /** |
| 1657 | * unlock_page_memcg - unlock a page->mem_cgroup binding | 1667 | * __unlock_page_memcg - unlock and unpin a memcg |
| 1658 | * @page: the page | 1668 | * @memcg: the memcg |
| 1669 | * | ||
| 1670 | * Unlock and unpin a memcg returned by lock_page_memcg(). | ||
| 1659 | */ | 1671 | */ |
| 1660 | void unlock_page_memcg(struct page *page) | 1672 | void __unlock_page_memcg(struct mem_cgroup *memcg) |
| 1661 | { | 1673 | { |
| 1662 | struct mem_cgroup *memcg = page->mem_cgroup; | ||
| 1663 | |||
| 1664 | if (memcg && memcg->move_lock_task == current) { | 1674 | if (memcg && memcg->move_lock_task == current) { |
| 1665 | unsigned long flags = memcg->move_lock_flags; | 1675 | unsigned long flags = memcg->move_lock_flags; |
| 1666 | 1676 | ||
| @@ -1672,6 +1682,15 @@ void unlock_page_memcg(struct page *page) | |||
| 1672 | 1682 | ||
| 1673 | rcu_read_unlock(); | 1683 | rcu_read_unlock(); |
| 1674 | } | 1684 | } |
| 1685 | |||
| 1686 | /** | ||
| 1687 | * unlock_page_memcg - unlock a page->mem_cgroup binding | ||
| 1688 | * @page: the page | ||
| 1689 | */ | ||
| 1690 | void unlock_page_memcg(struct page *page) | ||
| 1691 | { | ||
| 1692 | __unlock_page_memcg(page->mem_cgroup); | ||
| 1693 | } | ||
| 1675 | EXPORT_SYMBOL(unlock_page_memcg); | 1694 | EXPORT_SYMBOL(unlock_page_memcg); |
| 1676 | 1695 | ||
| 1677 | /* | 1696 | /* |
diff --git a/mm/memory.c b/mm/memory.c index e158f7ac6730..fe2fba27ded2 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -68,6 +68,7 @@ | |||
| 68 | #include <linux/debugfs.h> | 68 | #include <linux/debugfs.h> |
| 69 | #include <linux/userfaultfd_k.h> | 69 | #include <linux/userfaultfd_k.h> |
| 70 | #include <linux/dax.h> | 70 | #include <linux/dax.h> |
| 71 | #include <linux/oom.h> | ||
| 71 | 72 | ||
| 72 | #include <asm/io.h> | 73 | #include <asm/io.h> |
| 73 | #include <asm/mmu_context.h> | 74 | #include <asm/mmu_context.h> |
| @@ -2893,6 +2894,7 @@ static int do_anonymous_page(struct vm_fault *vmf) | |||
| 2893 | struct vm_area_struct *vma = vmf->vma; | 2894 | struct vm_area_struct *vma = vmf->vma; |
| 2894 | struct mem_cgroup *memcg; | 2895 | struct mem_cgroup *memcg; |
| 2895 | struct page *page; | 2896 | struct page *page; |
| 2897 | int ret = 0; | ||
| 2896 | pte_t entry; | 2898 | pte_t entry; |
| 2897 | 2899 | ||
| 2898 | /* File mapping without ->vm_ops ? */ | 2900 | /* File mapping without ->vm_ops ? */ |
| @@ -2925,6 +2927,9 @@ static int do_anonymous_page(struct vm_fault *vmf) | |||
| 2925 | vmf->address, &vmf->ptl); | 2927 | vmf->address, &vmf->ptl); |
| 2926 | if (!pte_none(*vmf->pte)) | 2928 | if (!pte_none(*vmf->pte)) |
| 2927 | goto unlock; | 2929 | goto unlock; |
| 2930 | ret = check_stable_address_space(vma->vm_mm); | ||
| 2931 | if (ret) | ||
| 2932 | goto unlock; | ||
| 2928 | /* Deliver the page fault to userland, check inside PT lock */ | 2933 | /* Deliver the page fault to userland, check inside PT lock */ |
| 2929 | if (userfaultfd_missing(vma)) { | 2934 | if (userfaultfd_missing(vma)) { |
| 2930 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 2935 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
| @@ -2959,6 +2964,10 @@ static int do_anonymous_page(struct vm_fault *vmf) | |||
| 2959 | if (!pte_none(*vmf->pte)) | 2964 | if (!pte_none(*vmf->pte)) |
| 2960 | goto release; | 2965 | goto release; |
| 2961 | 2966 | ||
| 2967 | ret = check_stable_address_space(vma->vm_mm); | ||
| 2968 | if (ret) | ||
| 2969 | goto release; | ||
| 2970 | |||
| 2962 | /* Deliver the page fault to userland, check inside PT lock */ | 2971 | /* Deliver the page fault to userland, check inside PT lock */ |
| 2963 | if (userfaultfd_missing(vma)) { | 2972 | if (userfaultfd_missing(vma)) { |
| 2964 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 2973 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
| @@ -2978,7 +2987,7 @@ setpte: | |||
| 2978 | update_mmu_cache(vma, vmf->address, vmf->pte); | 2987 | update_mmu_cache(vma, vmf->address, vmf->pte); |
| 2979 | unlock: | 2988 | unlock: |
| 2980 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 2989 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
| 2981 | return 0; | 2990 | return ret; |
| 2982 | release: | 2991 | release: |
| 2983 | mem_cgroup_cancel_charge(page, memcg, false); | 2992 | mem_cgroup_cancel_charge(page, memcg, false); |
| 2984 | put_page(page); | 2993 | put_page(page); |
| @@ -3252,7 +3261,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, | |||
| 3252 | int finish_fault(struct vm_fault *vmf) | 3261 | int finish_fault(struct vm_fault *vmf) |
| 3253 | { | 3262 | { |
| 3254 | struct page *page; | 3263 | struct page *page; |
| 3255 | int ret; | 3264 | int ret = 0; |
| 3256 | 3265 | ||
| 3257 | /* Did we COW the page? */ | 3266 | /* Did we COW the page? */ |
| 3258 | if ((vmf->flags & FAULT_FLAG_WRITE) && | 3267 | if ((vmf->flags & FAULT_FLAG_WRITE) && |
| @@ -3260,7 +3269,15 @@ int finish_fault(struct vm_fault *vmf) | |||
| 3260 | page = vmf->cow_page; | 3269 | page = vmf->cow_page; |
| 3261 | else | 3270 | else |
| 3262 | page = vmf->page; | 3271 | page = vmf->page; |
| 3263 | ret = alloc_set_pte(vmf, vmf->memcg, page); | 3272 | |
| 3273 | /* | ||
| 3274 | * check even for read faults because we might have lost our CoWed | ||
| 3275 | * page | ||
| 3276 | */ | ||
| 3277 | if (!(vmf->vma->vm_flags & VM_SHARED)) | ||
| 3278 | ret = check_stable_address_space(vmf->vma->vm_mm); | ||
| 3279 | if (!ret) | ||
| 3280 | ret = alloc_set_pte(vmf, vmf->memcg, page); | ||
| 3264 | if (vmf->pte) | 3281 | if (vmf->pte) |
| 3265 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 3282 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
| 3266 | return ret; | 3283 | return ret; |
| @@ -3900,19 +3917,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, | |||
| 3900 | mem_cgroup_oom_synchronize(false); | 3917 | mem_cgroup_oom_synchronize(false); |
| 3901 | } | 3918 | } |
| 3902 | 3919 | ||
| 3903 | /* | ||
| 3904 | * This mm has been already reaped by the oom reaper and so the | ||
| 3905 | * refault cannot be trusted in general. Anonymous refaults would | ||
| 3906 | * lose data and give a zero page instead e.g. This is especially | ||
| 3907 | * problem for use_mm() because regular tasks will just die and | ||
| 3908 | * the corrupted data will not be visible anywhere while kthread | ||
| 3909 | * will outlive the oom victim and potentially propagate the date | ||
| 3910 | * further. | ||
| 3911 | */ | ||
| 3912 | if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) | ||
| 3913 | && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) | ||
| 3914 | ret = VM_FAULT_SIGBUS; | ||
| 3915 | |||
| 3916 | return ret; | 3920 | return ret; |
| 3917 | } | 3921 | } |
| 3918 | EXPORT_SYMBOL_GPL(handle_mm_fault); | 3922 | EXPORT_SYMBOL_GPL(handle_mm_fault); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d911fa5cb2a7..618ab125228b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -861,11 +861,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
| 861 | *policy |= (pol->flags & MPOL_MODE_FLAGS); | 861 | *policy |= (pol->flags & MPOL_MODE_FLAGS); |
| 862 | } | 862 | } |
| 863 | 863 | ||
| 864 | if (vma) { | ||
| 865 | up_read(¤t->mm->mmap_sem); | ||
| 866 | vma = NULL; | ||
| 867 | } | ||
| 868 | |||
| 869 | err = 0; | 864 | err = 0; |
| 870 | if (nmask) { | 865 | if (nmask) { |
| 871 | if (mpol_store_user_nodemask(pol)) { | 866 | if (mpol_store_user_nodemask(pol)) { |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 36454d0f96ee..3637809a18d0 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
| @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void) | |||
| 146 | NULL) | 146 | NULL) |
| 147 | count += __free_memory_core(start, end); | 147 | count += __free_memory_core(start, end); |
| 148 | 148 | ||
| 149 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | ||
| 150 | { | ||
| 151 | phys_addr_t size; | ||
| 152 | |||
| 153 | /* Free memblock.reserved array if it was allocated */ | ||
| 154 | size = get_allocated_memblock_reserved_regions_info(&start); | ||
| 155 | if (size) | ||
| 156 | count += __free_memory_core(start, start + size); | ||
| 157 | |||
| 158 | /* Free memblock.memory array if it was allocated */ | ||
| 159 | size = get_allocated_memblock_memory_regions_info(&start); | ||
| 160 | if (size) | ||
| 161 | count += __free_memory_core(start, start + size); | ||
| 162 | } | ||
| 163 | #endif | ||
| 164 | |||
| 165 | return count; | 149 | return count; |
| 166 | } | 150 | } |
| 167 | 151 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 96e93b214d31..bf050ab025b7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -2724,9 +2724,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); | |||
| 2724 | int test_clear_page_writeback(struct page *page) | 2724 | int test_clear_page_writeback(struct page *page) |
| 2725 | { | 2725 | { |
| 2726 | struct address_space *mapping = page_mapping(page); | 2726 | struct address_space *mapping = page_mapping(page); |
| 2727 | struct mem_cgroup *memcg; | ||
| 2728 | struct lruvec *lruvec; | ||
| 2727 | int ret; | 2729 | int ret; |
| 2728 | 2730 | ||
| 2729 | lock_page_memcg(page); | 2731 | memcg = lock_page_memcg(page); |
| 2732 | lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); | ||
| 2730 | if (mapping && mapping_use_writeback_tags(mapping)) { | 2733 | if (mapping && mapping_use_writeback_tags(mapping)) { |
| 2731 | struct inode *inode = mapping->host; | 2734 | struct inode *inode = mapping->host; |
| 2732 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 2735 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
| @@ -2754,12 +2757,18 @@ int test_clear_page_writeback(struct page *page) | |||
| 2754 | } else { | 2757 | } else { |
| 2755 | ret = TestClearPageWriteback(page); | 2758 | ret = TestClearPageWriteback(page); |
| 2756 | } | 2759 | } |
| 2760 | /* | ||
| 2761 | * NOTE: Page might be free now! Writeback doesn't hold a page | ||
| 2762 | * reference on its own, it relies on truncation to wait for | ||
| 2763 | * the clearing of PG_writeback. The below can only access | ||
| 2764 | * page state that is static across allocation cycles. | ||
| 2765 | */ | ||
| 2757 | if (ret) { | 2766 | if (ret) { |
| 2758 | dec_lruvec_page_state(page, NR_WRITEBACK); | 2767 | dec_lruvec_state(lruvec, NR_WRITEBACK); |
| 2759 | dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); | 2768 | dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
| 2760 | inc_node_page_state(page, NR_WRITTEN); | 2769 | inc_node_page_state(page, NR_WRITTEN); |
| 2761 | } | 2770 | } |
| 2762 | unlock_page_memcg(page); | 2771 | __unlock_page_memcg(memcg); |
| 2763 | return ret; | 2772 | return ret; |
| 2764 | } | 2773 | } |
| 2765 | 2774 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d00f746c2fd..1bad301820c7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void) | |||
| 1584 | /* Reinit limits that are based on free pages after the kernel is up */ | 1584 | /* Reinit limits that are based on free pages after the kernel is up */ |
| 1585 | files_maxfiles_init(); | 1585 | files_maxfiles_init(); |
| 1586 | #endif | 1586 | #endif |
| 1587 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | ||
| 1588 | /* Discard memblock private memory */ | ||
| 1589 | memblock_discard(); | ||
| 1590 | #endif | ||
| 1587 | 1591 | ||
| 1588 | for_each_populated_zone(zone) | 1592 | for_each_populated_zone(zone) |
| 1589 | set_zone_contiguous(zone); | 1593 | set_zone_contiguous(zone); |
| @@ -5642,13 +5642,14 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) | |||
| 5642 | * A cache is never shut down before deactivation is | 5642 | * A cache is never shut down before deactivation is |
| 5643 | * complete, so no need to worry about synchronization. | 5643 | * complete, so no need to worry about synchronization. |
| 5644 | */ | 5644 | */ |
| 5645 | return; | 5645 | goto out; |
| 5646 | 5646 | ||
| 5647 | #ifdef CONFIG_MEMCG | 5647 | #ifdef CONFIG_MEMCG |
| 5648 | kset_unregister(s->memcg_kset); | 5648 | kset_unregister(s->memcg_kset); |
| 5649 | #endif | 5649 | #endif |
| 5650 | kobject_uevent(&s->kobj, KOBJ_REMOVE); | 5650 | kobject_uevent(&s->kobj, KOBJ_REMOVE); |
| 5651 | kobject_del(&s->kobj); | 5651 | kobject_del(&s->kobj); |
| 5652 | out: | ||
| 5652 | kobject_put(&s->kobj); | 5653 | kobject_put(&s->kobj); |
| 5653 | } | 5654 | } |
| 5654 | 5655 | ||
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8698c1c86c4d..a47e3894c775 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -1671,7 +1671,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 1671 | struct page **pages; | 1671 | struct page **pages; |
| 1672 | unsigned int nr_pages, array_size, i; | 1672 | unsigned int nr_pages, array_size, i; |
| 1673 | const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | 1673 | const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; |
| 1674 | const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN; | 1674 | const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; |
| 1675 | const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ? | ||
| 1676 | 0 : | ||
| 1677 | __GFP_HIGHMEM; | ||
| 1675 | 1678 | ||
| 1676 | nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; | 1679 | nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; |
| 1677 | array_size = (nr_pages * sizeof(struct page *)); | 1680 | array_size = (nr_pages * sizeof(struct page *)); |
| @@ -1679,7 +1682,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 1679 | area->nr_pages = nr_pages; | 1682 | area->nr_pages = nr_pages; |
| 1680 | /* Please note that the recursion is strictly bounded. */ | 1683 | /* Please note that the recursion is strictly bounded. */ |
| 1681 | if (array_size > PAGE_SIZE) { | 1684 | if (array_size > PAGE_SIZE) { |
| 1682 | pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, | 1685 | pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, |
| 1683 | PAGE_KERNEL, node, area->caller); | 1686 | PAGE_KERNEL, node, area->caller); |
| 1684 | } else { | 1687 | } else { |
| 1685 | pages = kmalloc_node(array_size, nested_gfp, node); | 1688 | pages = kmalloc_node(array_size, nested_gfp, node); |
| @@ -1700,9 +1703,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 1700 | } | 1703 | } |
| 1701 | 1704 | ||
| 1702 | if (node == NUMA_NO_NODE) | 1705 | if (node == NUMA_NO_NODE) |
| 1703 | page = alloc_page(alloc_mask); | 1706 | page = alloc_page(alloc_mask|highmem_mask); |
| 1704 | else | 1707 | else |
| 1705 | page = alloc_pages_node(node, alloc_mask, 0); | 1708 | page = alloc_pages_node(node, alloc_mask|highmem_mask, 0); |
| 1706 | 1709 | ||
| 1707 | if (unlikely(!page)) { | 1710 | if (unlikely(!page)) { |
| 1708 | /* Successfully allocated i pages, free them in __vunmap() */ | 1711 | /* Successfully allocated i pages, free them in __vunmap() */ |
| @@ -1710,7 +1713,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 1710 | goto fail; | 1713 | goto fail; |
| 1711 | } | 1714 | } |
| 1712 | area->pages[i] = page; | 1715 | area->pages[i] = page; |
| 1713 | if (gfpflags_allow_blocking(gfp_mask)) | 1716 | if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) |
| 1714 | cond_resched(); | 1717 | cond_resched(); |
| 1715 | } | 1718 | } |
| 1716 | 1719 | ||
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8cecae9a8bca..7956ea3be667 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh | |||
| @@ -473,8 +473,8 @@ usage() | |||
| 473 | echo " all Runs all tests (default)" | 473 | echo " all Runs all tests (default)" |
| 474 | echo " -t Run test ID the number amount of times is recommended" | 474 | echo " -t Run test ID the number amount of times is recommended" |
| 475 | echo " -w Watch test ID run until it runs into an error" | 475 | echo " -w Watch test ID run until it runs into an error" |
| 476 | echo " -c Run test ID once" | 476 | echo " -s Run test ID once" |
| 477 | echo " -s Run test ID x test-count number of times" | 477 | echo " -c Run test ID x test-count number of times" |
| 478 | echo " -l List all test ID list" | 478 | echo " -l List all test ID list" |
| 479 | echo " -h|--help Help" | 479 | echo " -h|--help Help" |
| 480 | echo | 480 | echo |
