summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorCorey Minyard <cminyard@mvista.com>2017-11-02 12:19:15 -0400
committerCorey Minyard <cminyard@mvista.com>2017-11-02 12:19:15 -0400
commit6297fabd93f93182245383ba7de56bef829a796b (patch)
tree804f5d28ada61b402d56281c9a047308d26347f4 /mm
parentd7e17fe4f7a7d961cc4375c7d868bd353a039bc7 (diff)
parentece1996a21eeb344b49200e627c6660111009c10 (diff)
Merge branch 'modules-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux into for-next
The IPMI SI driver was split into different pieces, merge the module tree to accountfor that. Signed-off-by: Corey Minyard <cminyard@mvista.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/cma.c2
-rw-r--r--mm/compaction.c13
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/ksm.c5
-rw-r--r--mm/list_lru.c12
-rw-r--r--mm/madvise.c19
-rw-r--r--mm/memcontrol.c23
-rw-r--r--mm/memory.c2
-rw-r--r--mm/memory_hotplug.c7
-rw-r--r--mm/mempolicy.c7
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/oom_kill.c16
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_vma_mapped.c28
-rw-r--r--mm/percpu-stats.c2
-rw-r--r--mm/percpu.c4
-rw-r--r--mm/rodata_test.c2
-rw-r--r--mm/slab_common.c22
-rw-r--r--mm/swap.c4
-rw-r--r--mm/swap_state.c52
-rw-r--r--mm/vmalloc.c6
-rw-r--r--mm/z3fold.c10
22 files changed, 144 insertions, 106 deletions
diff --git a/mm/cma.c b/mm/cma.c
index c0da318c020e..022e52bd8370 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
460 460
461 trace_cma_alloc(pfn, page, count, align); 461 trace_cma_alloc(pfn, page, count, align);
462 462
463 if (ret) { 463 if (ret && !(gfp_mask & __GFP_NOWARN)) {
464 pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", 464 pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
465 __func__, count, ret); 465 __func__, count, ret);
466 cma_debug_show_areas(cma); 466 cma_debug_show_areas(cma);
diff --git a/mm/compaction.c b/mm/compaction.c
index fb548e4c7bd4..03d31a875341 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1999,17 +1999,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
1999 if (pgdat->kcompactd_max_order < order) 1999 if (pgdat->kcompactd_max_order < order)
2000 pgdat->kcompactd_max_order = order; 2000 pgdat->kcompactd_max_order = order;
2001 2001
2002 /*
2003 * Pairs with implicit barrier in wait_event_freezable()
2004 * such that wakeups are not missed in the lockless
2005 * waitqueue_active() call.
2006 */
2007 smp_acquire__after_ctrl_dep();
2008
2009 if (pgdat->kcompactd_classzone_idx > classzone_idx) 2002 if (pgdat->kcompactd_classzone_idx > classzone_idx)
2010 pgdat->kcompactd_classzone_idx = classzone_idx; 2003 pgdat->kcompactd_classzone_idx = classzone_idx;
2011 2004
2012 if (!waitqueue_active(&pgdat->kcompactd_wait)) 2005 /*
2006 * Pairs with implicit barrier in wait_event_freezable()
2007 * such that wakeups are not missed.
2008 */
2009 if (!wq_has_sleeper(&pgdat->kcompactd_wait))
2013 return; 2010 return;
2014 2011
2015 if (!kcompactd_node_suitable(pgdat)) 2012 if (!kcompactd_node_suitable(pgdat))
diff --git a/mm/filemap.c b/mm/filemap.c
index db250d0e0565..594d73fef8b4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file)
620 trace_file_check_and_advance_wb_err(file, old); 620 trace_file_check_and_advance_wb_err(file, old);
621 spin_unlock(&file->f_lock); 621 spin_unlock(&file->f_lock);
622 } 622 }
623
624 /*
625 * We're mostly using this function as a drop in replacement for
626 * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
627 * that the legacy code would have had on these flags.
628 */
629 clear_bit(AS_EIO, &mapping->flags);
630 clear_bit(AS_ENOSPC, &mapping->flags);
623 return err; 631 return err;
624} 632}
625EXPORT_SYMBOL(file_check_and_advance_wb_err); 633EXPORT_SYMBOL(file_check_and_advance_wb_err);
diff --git a/mm/ksm.c b/mm/ksm.c
index 15dd7415f7b3..6cb60f46cce5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item,
1990 */ 1990 */
1991static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) 1991static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1992{ 1992{
1993 struct mm_struct *mm = rmap_item->mm;
1993 struct rmap_item *tree_rmap_item; 1994 struct rmap_item *tree_rmap_item;
1994 struct page *tree_page = NULL; 1995 struct page *tree_page = NULL;
1995 struct stable_node *stable_node; 1996 struct stable_node *stable_node;
@@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
2062 if (ksm_use_zero_pages && (checksum == zero_checksum)) { 2063 if (ksm_use_zero_pages && (checksum == zero_checksum)) {
2063 struct vm_area_struct *vma; 2064 struct vm_area_struct *vma;
2064 2065
2065 vma = find_mergeable_vma(rmap_item->mm, rmap_item->address); 2066 down_read(&mm->mmap_sem);
2067 vma = find_mergeable_vma(mm, rmap_item->address);
2066 err = try_to_merge_one_page(vma, page, 2068 err = try_to_merge_one_page(vma, page,
2067 ZERO_PAGE(rmap_item->address)); 2069 ZERO_PAGE(rmap_item->address));
2070 up_read(&mm->mmap_sem);
2068 /* 2071 /*
2069 * In case of failure, the page was not really empty, so we 2072 * In case of failure, the page was not really empty, so we
2070 * need to continue. Otherwise we're done. 2073 * need to continue. Otherwise we're done.
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 7a40fa2be858..f141f0c80ff3 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
325{ 325{
326 int size = memcg_nr_cache_ids; 326 int size = memcg_nr_cache_ids;
327 327
328 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); 328 nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
329 if (!nlru->memcg_lrus) 329 if (!nlru->memcg_lrus)
330 return -ENOMEM; 330 return -ENOMEM;
331 331
332 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { 332 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
333 kfree(nlru->memcg_lrus); 333 kvfree(nlru->memcg_lrus);
334 return -ENOMEM; 334 return -ENOMEM;
335 } 335 }
336 336
@@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
340static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 340static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
341{ 341{
342 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); 342 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
343 kfree(nlru->memcg_lrus); 343 kvfree(nlru->memcg_lrus);
344} 344}
345 345
346static int memcg_update_list_lru_node(struct list_lru_node *nlru, 346static int memcg_update_list_lru_node(struct list_lru_node *nlru,
@@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
351 BUG_ON(old_size > new_size); 351 BUG_ON(old_size > new_size);
352 352
353 old = nlru->memcg_lrus; 353 old = nlru->memcg_lrus;
354 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); 354 new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
355 if (!new) 355 if (!new)
356 return -ENOMEM; 356 return -ENOMEM;
357 357
358 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 358 if (__memcg_init_list_lru_node(new, old_size, new_size)) {
359 kfree(new); 359 kvfree(new);
360 return -ENOMEM; 360 return -ENOMEM;
361 } 361 }
362 362
@@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
373 nlru->memcg_lrus = new; 373 nlru->memcg_lrus = new;
374 spin_unlock_irq(&nlru->lock); 374 spin_unlock_irq(&nlru->lock);
375 375
376 kfree(old); 376 kvfree(old);
377 return 0; 377 return 0;
378} 378}
379 379
diff --git a/mm/madvise.c b/mm/madvise.c
index 21261ff0466f..fd70d6aabc3e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -625,18 +625,26 @@ static int madvise_inject_error(int behavior,
625{ 625{
626 struct page *page; 626 struct page *page;
627 struct zone *zone; 627 struct zone *zone;
628 unsigned int order;
628 629
629 if (!capable(CAP_SYS_ADMIN)) 630 if (!capable(CAP_SYS_ADMIN))
630 return -EPERM; 631 return -EPERM;
631 632
632 for (; start < end; start += PAGE_SIZE << 633
633 compound_order(compound_head(page))) { 634 for (; start < end; start += PAGE_SIZE << order) {
634 int ret; 635 int ret;
635 636
636 ret = get_user_pages_fast(start, 1, 0, &page); 637 ret = get_user_pages_fast(start, 1, 0, &page);
637 if (ret != 1) 638 if (ret != 1)
638 return ret; 639 return ret;
639 640
641 /*
642 * When soft offlining hugepages, after migrating the page
643 * we dissolve it, therefore in the second loop "page" will
644 * no longer be a compound page, and order will be 0.
645 */
646 order = compound_order(compound_head(page));
647
640 if (PageHWPoison(page)) { 648 if (PageHWPoison(page)) {
641 put_page(page); 649 put_page(page);
642 continue; 650 continue;
@@ -749,6 +757,9 @@ madvise_behavior_valid(int behavior)
749 * MADV_DONTFORK - omit this area from child's address space when forking: 757 * MADV_DONTFORK - omit this area from child's address space when forking:
750 * typically, to avoid COWing pages pinned by get_user_pages(). 758 * typically, to avoid COWing pages pinned by get_user_pages().
751 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. 759 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
760 * MADV_WIPEONFORK - present the child process with zero-filled memory in this
761 * range after a fork.
762 * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
752 * MADV_HWPOISON - trigger memory error handler as if the given memory range 763 * MADV_HWPOISON - trigger memory error handler as if the given memory range
753 * were corrupted by unrecoverable hardware memory failure. 764 * were corrupted by unrecoverable hardware memory failure.
754 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. 765 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
@@ -769,7 +780,9 @@ madvise_behavior_valid(int behavior)
769 * zero - success 780 * zero - success
770 * -EINVAL - start + len < 0, start is not page-aligned, 781 * -EINVAL - start + len < 0, start is not page-aligned,
771 * "behavior" is not a valid value, or application 782 * "behavior" is not a valid value, or application
772 * is attempting to release locked or shared pages. 783 * is attempting to release locked or shared pages,
784 * or the specified address range includes file, Huge TLB,
785 * MAP_SHARED or VMPFNMAP range.
773 * -ENOMEM - addresses in the specified range are not currently 786 * -ENOMEM - addresses in the specified range are not currently
774 * mapped, or are outside the AS of the process. 787 * mapped, or are outside the AS of the process.
775 * -EIO - an I/O error occurred while paging in data. 788 * -EIO - an I/O error occurred while paging in data.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 15af3da5af02..d5f3a62887cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy)
1777 struct memcg_stock_pcp *stock; 1777 struct memcg_stock_pcp *stock;
1778 unsigned long flags; 1778 unsigned long flags;
1779 1779
1780 /*
1781 * The only protection from memory hotplug vs. drain_stock races is
1782 * that we always operate on local CPU stock here with IRQ disabled
1783 */
1780 local_irq_save(flags); 1784 local_irq_save(flags);
1781 1785
1782 stock = this_cpu_ptr(&memcg_stock); 1786 stock = this_cpu_ptr(&memcg_stock);
@@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
1821 /* If someone's already draining, avoid adding running more workers. */ 1825 /* If someone's already draining, avoid adding running more workers. */
1822 if (!mutex_trylock(&percpu_charge_mutex)) 1826 if (!mutex_trylock(&percpu_charge_mutex))
1823 return; 1827 return;
1824 /* Notify other cpus that system-wide "drain" is running */ 1828 /*
1825 get_online_cpus(); 1829 * Notify other cpus that system-wide "drain" is running
1830 * We do not care about races with the cpu hotplug because cpu down
1831 * as well as workers from this path always operate on the local
1832 * per-cpu data. CPU up doesn't touch memcg_stock at all.
1833 */
1826 curcpu = get_cpu(); 1834 curcpu = get_cpu();
1827 for_each_online_cpu(cpu) { 1835 for_each_online_cpu(cpu) {
1828 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); 1836 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
1829 struct mem_cgroup *memcg; 1837 struct mem_cgroup *memcg;
1830 1838
1831 memcg = stock->cached; 1839 memcg = stock->cached;
1832 if (!memcg || !stock->nr_pages) 1840 if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
1833 continue; 1841 continue;
1834 if (!mem_cgroup_is_descendant(memcg, root_memcg)) 1842 if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
1843 css_put(&memcg->css);
1835 continue; 1844 continue;
1845 }
1836 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { 1846 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
1837 if (cpu == curcpu) 1847 if (cpu == curcpu)
1838 drain_local_stock(&stock->work); 1848 drain_local_stock(&stock->work);
1839 else 1849 else
1840 schedule_work_on(cpu, &stock->work); 1850 schedule_work_on(cpu, &stock->work);
1841 } 1851 }
1852 css_put(&memcg->css);
1842 } 1853 }
1843 put_cpu(); 1854 put_cpu();
1844 put_online_cpus();
1845 mutex_unlock(&percpu_charge_mutex); 1855 mutex_unlock(&percpu_charge_mutex);
1846} 1856}
1847 1857
@@ -5648,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
5648static void uncharge_page(struct page *page, struct uncharge_gather *ug) 5658static void uncharge_page(struct page *page, struct uncharge_gather *ug)
5649{ 5659{
5650 VM_BUG_ON_PAGE(PageLRU(page), page); 5660 VM_BUG_ON_PAGE(PageLRU(page), page);
5651 VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); 5661 VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) &&
5662 !PageHWPoison(page) , page);
5652 5663
5653 if (!page->mem_cgroup) 5664 if (!page->mem_cgroup)
5654 return; 5665 return;
diff --git a/mm/memory.c b/mm/memory.c
index ec4e15494901..a728bed16c20 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
845 * vm_normal_page() so that we do not have to special case all 845 * vm_normal_page() so that we do not have to special case all
846 * call site of vm_normal_page(). 846 * call site of vm_normal_page().
847 */ 847 */
848 if (likely(pfn < highest_memmap_pfn)) { 848 if (likely(pfn <= highest_memmap_pfn)) {
849 struct page *page = pfn_to_page(pfn); 849 struct page *page = pfn_to_page(pfn);
850 850
851 if (is_device_public_page(page)) { 851 if (is_device_public_page(page)) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e882cb6da994..d4b5f29906b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
328 if (err && (err != -EEXIST)) 328 if (err && (err != -EEXIST))
329 break; 329 break;
330 err = 0; 330 err = 0;
331 cond_resched();
331 } 332 }
332 vmemmap_populate_print_last(); 333 vmemmap_populate_print_last();
333out: 334out:
@@ -337,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages);
337 338
338#ifdef CONFIG_MEMORY_HOTREMOVE 339#ifdef CONFIG_MEMORY_HOTREMOVE
339/* find the smallest valid pfn in the range [start_pfn, end_pfn) */ 340/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
340static int find_smallest_section_pfn(int nid, struct zone *zone, 341static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
341 unsigned long start_pfn, 342 unsigned long start_pfn,
342 unsigned long end_pfn) 343 unsigned long end_pfn)
343{ 344{
@@ -362,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone,
362} 363}
363 364
364/* find the biggest valid pfn in the range [start_pfn, end_pfn). */ 365/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
365static int find_biggest_section_pfn(int nid, struct zone *zone, 366static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
366 unsigned long start_pfn, 367 unsigned long start_pfn,
367 unsigned long end_pfn) 368 unsigned long end_pfn)
368{ 369{
@@ -550,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
550 return ret; 551 return ret;
551 552
552 scn_nr = __section_nr(ms); 553 scn_nr = __section_nr(ms);
553 start_pfn = section_nr_to_pfn(scn_nr); 554 start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
554 __remove_zone(zone, start_pfn); 555 __remove_zone(zone, start_pfn);
555 556
556 sparse_remove_one_section(zone, ms, map_offset); 557 sparse_remove_one_section(zone, ms, map_offset);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 006ba625c0b8..a2af6d58a68f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1920 struct page *page; 1920 struct page *page;
1921 1921
1922 page = __alloc_pages(gfp, order, nid); 1922 page = __alloc_pages(gfp, order, nid);
1923 if (page && page_to_nid(page) == nid) 1923 if (page && page_to_nid(page) == nid) {
1924 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); 1924 preempt_disable();
1925 __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
1926 preempt_enable();
1927 }
1925 return page; 1928 return page;
1926} 1929}
1927 1930
diff --git a/mm/migrate.c b/mm/migrate.c
index 6954c1435833..e00814ca390e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start,
2146 unsigned long addr; 2146 unsigned long addr;
2147 2147
2148 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { 2148 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2149 migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; 2149 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2150 migrate->dst[migrate->npages] = 0; 2150 migrate->dst[migrate->npages] = 0;
2151 migrate->npages++;
2151 migrate->cpages++; 2152 migrate->cpages++;
2152 } 2153 }
2153 2154
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 99736e026712..dee0f75c3013 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -40,6 +40,7 @@
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/mmu_notifier.h>
43 44
44#include <asm/tlb.h> 45#include <asm/tlb.h>
45#include "internal.h" 46#include "internal.h"
@@ -495,6 +496,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
495 } 496 }
496 497
497 /* 498 /*
499 * If the mm has notifiers then we would need to invalidate them around
500 * unmap_page_range and that is risky because notifiers can sleep and
501 * what they do is basically undeterministic. So let's have a short
502 * sleep to give the oom victim some more time.
503 * TODO: we really want to get rid of this ugly hack and make sure that
504 * notifiers cannot block for unbounded amount of time and add
505 * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
506 */
507 if (mm_has_notifiers(mm)) {
508 up_read(&mm->mmap_sem);
509 schedule_timeout_idle(HZ);
510 goto unlock_oom;
511 }
512
513 /*
498 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't 514 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
499 * work on the mm anymore. The check for MMF_OOM_SKIP must run 515 * work on the mm anymore. The check for MMF_OOM_SKIP must run
500 * under mmap_sem for reading because it serializes against the 516 * under mmap_sem for reading because it serializes against the
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c841af88836a..77e4d3c5c57b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
1190} 1190}
1191 1191
1192#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1192#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1193static void init_reserved_page(unsigned long pfn) 1193static void __meminit init_reserved_page(unsigned long pfn)
1194{ 1194{
1195 pg_data_t *pgdat; 1195 pg_data_t *pgdat;
1196 int nid, zid; 1196 int nid, zid;
@@ -5367,6 +5367,7 @@ not_early:
5367 5367
5368 __init_single_page(page, pfn, zone, nid); 5368 __init_single_page(page, pfn, zone, nid);
5369 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 5369 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
5370 cond_resched();
5370 } else { 5371 } else {
5371 __init_single_pfn(pfn, zone, nid); 5372 __init_single_pfn(pfn, zone, nid);
5372 } 5373 }
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 6a03946469a9..53afbb919a1c 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -6,17 +6,6 @@
6 6
7#include "internal.h" 7#include "internal.h"
8 8
9static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
10{
11 pmd_t pmde;
12 /*
13 * Make sure we don't re-load pmd between present and !trans_huge check.
14 * We need a consistent view.
15 */
16 pmde = READ_ONCE(*pvmw->pmd);
17 return pmd_present(pmde) && !pmd_trans_huge(pmde);
18}
19
20static inline bool not_found(struct page_vma_mapped_walk *pvmw) 9static inline bool not_found(struct page_vma_mapped_walk *pvmw)
21{ 10{
22 page_vma_mapped_walk_done(pvmw); 11 page_vma_mapped_walk_done(pvmw);
@@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
116 pgd_t *pgd; 105 pgd_t *pgd;
117 p4d_t *p4d; 106 p4d_t *p4d;
118 pud_t *pud; 107 pud_t *pud;
108 pmd_t pmde;
119 109
120 /* The only possible pmd mapping has been handled on last iteration */ 110 /* The only possible pmd mapping has been handled on last iteration */
121 if (pvmw->pmd && !pvmw->pte) 111 if (pvmw->pmd && !pvmw->pte)
@@ -148,7 +138,13 @@ restart:
148 if (!pud_present(*pud)) 138 if (!pud_present(*pud))
149 return false; 139 return false;
150 pvmw->pmd = pmd_offset(pud, pvmw->address); 140 pvmw->pmd = pmd_offset(pud, pvmw->address);
151 if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { 141 /*
142 * Make sure the pmd value isn't cached in a register by the
143 * compiler and used as a stale value after we've observed a
144 * subsequent update.
145 */
146 pmde = READ_ONCE(*pvmw->pmd);
147 if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
152 pvmw->ptl = pmd_lock(mm, pvmw->pmd); 148 pvmw->ptl = pmd_lock(mm, pvmw->pmd);
153 if (likely(pmd_trans_huge(*pvmw->pmd))) { 149 if (likely(pmd_trans_huge(*pvmw->pmd))) {
154 if (pvmw->flags & PVMW_MIGRATION) 150 if (pvmw->flags & PVMW_MIGRATION)
@@ -167,17 +163,15 @@ restart:
167 return not_found(pvmw); 163 return not_found(pvmw);
168 return true; 164 return true;
169 } 165 }
170 } else 166 }
171 WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
172 return not_found(pvmw); 167 return not_found(pvmw);
173 } else { 168 } else {
174 /* THP pmd was split under us: handle on pte level */ 169 /* THP pmd was split under us: handle on pte level */
175 spin_unlock(pvmw->ptl); 170 spin_unlock(pvmw->ptl);
176 pvmw->ptl = NULL; 171 pvmw->ptl = NULL;
177 } 172 }
178 } else { 173 } else if (!pmd_present(pmde)) {
179 if (!check_pmd(pvmw)) 174 return false;
180 return false;
181 } 175 }
182 if (!map_pte(pvmw)) 176 if (!map_pte(pvmw))
183 goto next_pte; 177 goto next_pte;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 6142484e88f7..7a58460bfd27 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -73,7 +73,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
73 last_alloc + 1 : 0; 73 last_alloc + 1 : 0;
74 74
75 as_len = 0; 75 as_len = 0;
76 start = chunk->start_offset; 76 start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
77 77
78 /* 78 /*
79 * If a bit is set in the allocation map, the bound_map identifies 79 * If a bit is set in the allocation map, the bound_map identifies
diff --git a/mm/percpu.c b/mm/percpu.c
index 59d44d61f5f1..aa121cef76de 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -353,6 +353,8 @@ static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
353 block->contig_hint_start); 353 block->contig_hint_start);
354 return; 354 return;
355 } 355 }
356 /* reset to satisfy the second predicate above */
357 block_off = 0;
356 358
357 *bits = block->right_free; 359 *bits = block->right_free;
358 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; 360 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
@@ -407,6 +409,8 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
407 *bit_off = pcpu_block_off_to_off(i, block->first_free); 409 *bit_off = pcpu_block_off_to_off(i, block->first_free);
408 return; 410 return;
409 } 411 }
412 /* reset to satisfy the second predicate above */
413 block_off = 0;
410 414
411 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free, 415 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
412 align); 416 align);
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index 6bb4deb12e78..d908c8769b48 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -14,7 +14,7 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <asm/sections.h> 15#include <asm/sections.h>
16 16
17const int rodata_test_data = 0xC3; 17static const int rodata_test_data = 0xC3;
18 18
19void rodata_test(void) 19void rodata_test(void)
20{ 20{
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 904a83be82de..80164599ca5d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -165,9 +165,9 @@ static int init_memcg_params(struct kmem_cache *s,
165 if (!memcg_nr_cache_ids) 165 if (!memcg_nr_cache_ids)
166 return 0; 166 return 0;
167 167
168 arr = kzalloc(sizeof(struct memcg_cache_array) + 168 arr = kvzalloc(sizeof(struct memcg_cache_array) +
169 memcg_nr_cache_ids * sizeof(void *), 169 memcg_nr_cache_ids * sizeof(void *),
170 GFP_KERNEL); 170 GFP_KERNEL);
171 if (!arr) 171 if (!arr)
172 return -ENOMEM; 172 return -ENOMEM;
173 173
@@ -178,15 +178,23 @@ static int init_memcg_params(struct kmem_cache *s,
178static void destroy_memcg_params(struct kmem_cache *s) 178static void destroy_memcg_params(struct kmem_cache *s)
179{ 179{
180 if (is_root_cache(s)) 180 if (is_root_cache(s))
181 kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); 181 kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
182}
183
184static void free_memcg_params(struct rcu_head *rcu)
185{
186 struct memcg_cache_array *old;
187
188 old = container_of(rcu, struct memcg_cache_array, rcu);
189 kvfree(old);
182} 190}
183 191
184static int update_memcg_params(struct kmem_cache *s, int new_array_size) 192static int update_memcg_params(struct kmem_cache *s, int new_array_size)
185{ 193{
186 struct memcg_cache_array *old, *new; 194 struct memcg_cache_array *old, *new;
187 195
188 new = kzalloc(sizeof(struct memcg_cache_array) + 196 new = kvzalloc(sizeof(struct memcg_cache_array) +
189 new_array_size * sizeof(void *), GFP_KERNEL); 197 new_array_size * sizeof(void *), GFP_KERNEL);
190 if (!new) 198 if (!new)
191 return -ENOMEM; 199 return -ENOMEM;
192 200
@@ -198,7 +206,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
198 206
199 rcu_assign_pointer(s->memcg_params.memcg_caches, new); 207 rcu_assign_pointer(s->memcg_params.memcg_caches, new);
200 if (old) 208 if (old)
201 kfree_rcu(old, rcu); 209 call_rcu(&old->rcu, free_memcg_params);
202 return 0; 210 return 0;
203} 211}
204 212
diff --git a/mm/swap.c b/mm/swap.c
index 9295ae960d66..a77d68f2c1b6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
575 void *arg) 575 void *arg)
576{ 576{
577 if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && 577 if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
578 !PageUnevictable(page)) { 578 !PageSwapCache(page) && !PageUnevictable(page)) {
579 bool active = PageActive(page); 579 bool active = PageActive(page);
580 580
581 del_page_from_lru_list(page, lruvec, 581 del_page_from_lru_list(page, lruvec,
@@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page)
665void mark_page_lazyfree(struct page *page) 665void mark_page_lazyfree(struct page *page)
666{ 666{
667 if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && 667 if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
668 !PageUnevictable(page)) { 668 !PageSwapCache(page) && !PageUnevictable(page)) {
669 struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); 669 struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
670 670
671 get_page(page); 671 get_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 71ce2d1ccbf7..05b6803f0cce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
39static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; 39static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
40bool swap_vma_readahead = true; 40bool swap_vma_readahead = true;
41 41
42#define SWAP_RA_MAX_ORDER_DEFAULT 3
43
44static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
45
46#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 42#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
47#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 43#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
48#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 44#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
@@ -242,6 +238,17 @@ int add_to_swap(struct page *page)
242 * clear SWAP_HAS_CACHE flag. 238 * clear SWAP_HAS_CACHE flag.
243 */ 239 */
244 goto fail; 240 goto fail;
241 /*
242 * Normally the page will be dirtied in unmap because its pte should be
243 * dirty. A special case is MADV_FREE page. The page'e pte could have
244 * dirty bit cleared but the page's SwapBacked bit is still set because
245 * clearing the dirty bit and SwapBacked bit has no lock protected. For
246 * such page, unmap will not set dirty bit for it, so page reclaim will
247 * not write the page out. This can cause data corruption when the page
248 * is swap in later. Always setting the dirty bit for the page solves
249 * the problem.
250 */
251 set_page_dirty(page);
245 252
246 return 1; 253 return 1;
247 254
@@ -653,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
653 pte_t *tpte; 660 pte_t *tpte;
654#endif 661#endif
655 662
663 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
664 SWAP_RA_ORDER_CEILING);
665 if (max_win == 1) {
666 swap_ra->win = 1;
667 return NULL;
668 }
669
656 faddr = vmf->address; 670 faddr = vmf->address;
657 entry = pte_to_swp_entry(vmf->orig_pte); 671 entry = pte_to_swp_entry(vmf->orig_pte);
658 if ((unlikely(non_swap_entry(entry)))) 672 if ((unlikely(non_swap_entry(entry))))
@@ -661,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
661 if (page) 675 if (page)
662 return page; 676 return page;
663 677
664 max_win = 1 << READ_ONCE(swap_ra_max_order);
665 if (max_win == 1) {
666 swap_ra->win = 1;
667 return NULL;
668 }
669
670 fpfn = PFN_DOWN(faddr); 678 fpfn = PFN_DOWN(faddr);
671 swap_ra_info = GET_SWAP_RA_VAL(vma); 679 swap_ra_info = GET_SWAP_RA_VAL(vma);
672 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); 680 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -775,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
775 __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 783 __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
776 vma_ra_enabled_store); 784 vma_ra_enabled_store);
777 785
778static ssize_t vma_ra_max_order_show(struct kobject *kobj,
779 struct kobj_attribute *attr, char *buf)
780{
781 return sprintf(buf, "%d\n", swap_ra_max_order);
782}
783static ssize_t vma_ra_max_order_store(struct kobject *kobj,
784 struct kobj_attribute *attr,
785 const char *buf, size_t count)
786{
787 int err, v;
788
789 err = kstrtoint(buf, 10, &v);
790 if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
791 return -EINVAL;
792
793 swap_ra_max_order = v;
794
795 return count;
796}
797static struct kobj_attribute vma_ra_max_order_attr =
798 __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
799 vma_ra_max_order_store);
800
801static struct attribute *swap_attrs[] = { 786static struct attribute *swap_attrs[] = {
802 &vma_ra_enabled_attr.attr, 787 &vma_ra_enabled_attr.attr,
803 &vma_ra_max_order_attr.attr,
804 NULL, 788 NULL,
805}; 789};
806 790
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8a43db6284eb..673942094328 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1695 for (i = 0; i < area->nr_pages; i++) { 1695 for (i = 0; i < area->nr_pages; i++) {
1696 struct page *page; 1696 struct page *page;
1697 1697
1698 if (fatal_signal_pending(current)) {
1699 area->nr_pages = i;
1700 goto fail_no_warn;
1701 }
1702
1703 if (node == NUMA_NO_NODE) 1698 if (node == NUMA_NO_NODE)
1704 page = alloc_page(alloc_mask|highmem_mask); 1699 page = alloc_page(alloc_mask|highmem_mask);
1705 else 1700 else
@@ -1723,7 +1718,6 @@ fail:
1723 warn_alloc(gfp_mask, NULL, 1718 warn_alloc(gfp_mask, NULL,
1724 "vmalloc: allocation failure, allocated %ld of %ld bytes", 1719 "vmalloc: allocation failure, allocated %ld of %ld bytes",
1725 (area->nr_pages*PAGE_SIZE), area->size); 1720 (area->nr_pages*PAGE_SIZE), area->size);
1726fail_no_warn:
1727 vfree(area->addr); 1721 vfree(area->addr);
1728 return NULL; 1722 return NULL;
1729} 1723}
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 486550df32be..b2ba2ba585f3 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
250 250
251 WARN_ON(!list_empty(&zhdr->buddy)); 251 WARN_ON(!list_empty(&zhdr->buddy));
252 set_bit(PAGE_STALE, &page->private); 252 set_bit(PAGE_STALE, &page->private);
253 clear_bit(NEEDS_COMPACTING, &page->private);
253 spin_lock(&pool->lock); 254 spin_lock(&pool->lock);
254 if (!list_empty(&page->lru)) 255 if (!list_empty(&page->lru))
255 list_del(&page->lru); 256 list_del(&page->lru);
@@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w)
303 list_del(&zhdr->buddy); 304 list_del(&zhdr->buddy);
304 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 305 if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
305 continue; 306 continue;
306 clear_bit(NEEDS_COMPACTING, &page->private);
307 spin_unlock(&pool->stale_lock); 307 spin_unlock(&pool->stale_lock);
308 cancel_work_sync(&zhdr->work); 308 cancel_work_sync(&zhdr->work);
309 free_z3fold_page(page); 309 free_z3fold_page(page);
@@ -624,10 +624,8 @@ lookup:
624 * stale pages list. cancel_work_sync() can sleep so we must make 624 * stale pages list. cancel_work_sync() can sleep so we must make
625 * sure it won't be called in case we're in atomic context. 625 * sure it won't be called in case we're in atomic context.
626 */ 626 */
627 if (zhdr && (can_sleep || !work_pending(&zhdr->work) || 627 if (zhdr && (can_sleep || !work_pending(&zhdr->work))) {
628 !unlikely(work_busy(&zhdr->work)))) {
629 list_del(&zhdr->buddy); 628 list_del(&zhdr->buddy);
630 clear_bit(NEEDS_COMPACTING, &page->private);
631 spin_unlock(&pool->stale_lock); 629 spin_unlock(&pool->stale_lock);
632 if (can_sleep) 630 if (can_sleep)
633 cancel_work_sync(&zhdr->work); 631 cancel_work_sync(&zhdr->work);
@@ -875,16 +873,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
875 goto next; 873 goto next;
876 } 874 }
877next: 875next:
876 spin_lock(&pool->lock);
878 if (test_bit(PAGE_HEADLESS, &page->private)) { 877 if (test_bit(PAGE_HEADLESS, &page->private)) {
879 if (ret == 0) { 878 if (ret == 0) {
879 spin_unlock(&pool->lock);
880 free_z3fold_page(page); 880 free_z3fold_page(page);
881 return 0; 881 return 0;
882 } 882 }
883 } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { 883 } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
884 atomic64_dec(&pool->pages_nr); 884 atomic64_dec(&pool->pages_nr);
885 spin_unlock(&pool->lock);
885 return 0; 886 return 0;
886 } 887 }
887 spin_lock(&pool->lock);
888 888
889 /* 889 /*
890 * Add to the beginning of LRU. 890 * Add to the beginning of LRU.