aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/iov_iter.c14
-rw-r--r--mm/memblock.c7
-rw-r--r--mm/memcontrol.c139
-rw-r--r--mm/memory.c11
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/nobootmem.c2
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/percpu-vm.c22
-rw-r--r--mm/percpu.c2
-rw-r--r--mm/pgtable-generic.c2
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c15
-rw-r--r--mm/zbud.c1
-rw-r--r--mm/zpool.c2
-rw-r--r--mm/zsmalloc.c1
19 files changed, 182 insertions, 79 deletions
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 306baa594f95..ba8019b063e1 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -176,7 +176,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
176 if (list_empty(&dev->dma_pools) && 176 if (list_empty(&dev->dma_pools) &&
177 device_create_file(dev, &dev_attr_pools)) { 177 device_create_file(dev, &dev_attr_pools)) {
178 kfree(retval); 178 kfree(retval);
179 return NULL; 179 retval = NULL;
180 } else 180 } else
181 list_add(&retval->pools, &dev->dma_pools); 181 list_add(&retval->pools, &dev->dma_pools);
182 mutex_unlock(&pools_lock); 182 mutex_unlock(&pools_lock);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d9a21d06b862..f8ffd9412ec5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1795,14 +1795,17 @@ static int __split_huge_page_map(struct page *page,
1795 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { 1795 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
1796 pte_t *pte, entry; 1796 pte_t *pte, entry;
1797 BUG_ON(PageCompound(page+i)); 1797 BUG_ON(PageCompound(page+i));
1798 /*
1799 * Note that pmd_numa is not transferred deliberately
1800 * to avoid any possibility that pte_numa leaks to
1801 * a PROT_NONE VMA by accident.
1802 */
1798 entry = mk_pte(page + i, vma->vm_page_prot); 1803 entry = mk_pte(page + i, vma->vm_page_prot);
1799 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1804 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1800 if (!pmd_write(*pmd)) 1805 if (!pmd_write(*pmd))
1801 entry = pte_wrprotect(entry); 1806 entry = pte_wrprotect(entry);
1802 if (!pmd_young(*pmd)) 1807 if (!pmd_young(*pmd))
1803 entry = pte_mkold(entry); 1808 entry = pte_mkold(entry);
1804 if (pmd_numa(*pmd))
1805 entry = pte_mknuma(entry);
1806 pte = pte_offset_map(&_pmd, haddr); 1809 pte = pte_offset_map(&_pmd, haddr);
1807 BUG_ON(!pte_none(*pte)); 1810 BUG_ON(!pte_none(*pte));
1808 set_pte_at(mm, haddr, pte, entry); 1811 set_pte_at(mm, haddr, pte, entry);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 9eebfadeeee1..a67c26e0f360 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -217,7 +217,7 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
217 217
218 if (hugetlb_cgroup_disabled()) 218 if (hugetlb_cgroup_disabled())
219 return; 219 return;
220 VM_BUG_ON(!spin_is_locked(&hugetlb_lock)); 220 lockdep_assert_held(&hugetlb_lock);
221 h_cg = hugetlb_cgroup_from_page(page); 221 h_cg = hugetlb_cgroup_from_page(page);
222 if (unlikely(!h_cg)) 222 if (unlikely(!h_cg))
223 return; 223 return;
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index ab88dc0ea1d3..9a09f2034fcc 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
310EXPORT_SYMBOL(iov_iter_init); 310EXPORT_SYMBOL(iov_iter_init);
311 311
312static ssize_t get_pages_iovec(struct iov_iter *i, 312static ssize_t get_pages_iovec(struct iov_iter *i,
313 struct page **pages, unsigned maxpages, 313 struct page **pages, size_t maxsize, unsigned maxpages,
314 size_t *start) 314 size_t *start)
315{ 315{
316 size_t offset = i->iov_offset; 316 size_t offset = i->iov_offset;
@@ -323,6 +323,8 @@ static ssize_t get_pages_iovec(struct iov_iter *i,
323 len = iov->iov_len - offset; 323 len = iov->iov_len - offset;
324 if (len > i->count) 324 if (len > i->count)
325 len = i->count; 325 len = i->count;
326 if (len > maxsize)
327 len = maxsize;
326 addr = (unsigned long)iov->iov_base + offset; 328 addr = (unsigned long)iov->iov_base + offset;
327 len += *start = addr & (PAGE_SIZE - 1); 329 len += *start = addr & (PAGE_SIZE - 1);
328 if (len > maxpages * PAGE_SIZE) 330 if (len > maxpages * PAGE_SIZE)
@@ -588,13 +590,15 @@ static unsigned long alignment_bvec(const struct iov_iter *i)
588} 590}
589 591
590static ssize_t get_pages_bvec(struct iov_iter *i, 592static ssize_t get_pages_bvec(struct iov_iter *i,
591 struct page **pages, unsigned maxpages, 593 struct page **pages, size_t maxsize, unsigned maxpages,
592 size_t *start) 594 size_t *start)
593{ 595{
594 const struct bio_vec *bvec = i->bvec; 596 const struct bio_vec *bvec = i->bvec;
595 size_t len = bvec->bv_len - i->iov_offset; 597 size_t len = bvec->bv_len - i->iov_offset;
596 if (len > i->count) 598 if (len > i->count)
597 len = i->count; 599 len = i->count;
600 if (len > maxsize)
601 len = maxsize;
598 /* can't be more than PAGE_SIZE */ 602 /* can't be more than PAGE_SIZE */
599 *start = bvec->bv_offset + i->iov_offset; 603 *start = bvec->bv_offset + i->iov_offset;
600 604
@@ -711,13 +715,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
711EXPORT_SYMBOL(iov_iter_alignment); 715EXPORT_SYMBOL(iov_iter_alignment);
712 716
713ssize_t iov_iter_get_pages(struct iov_iter *i, 717ssize_t iov_iter_get_pages(struct iov_iter *i,
714 struct page **pages, unsigned maxpages, 718 struct page **pages, size_t maxsize, unsigned maxpages,
715 size_t *start) 719 size_t *start)
716{ 720{
717 if (i->type & ITER_BVEC) 721 if (i->type & ITER_BVEC)
718 return get_pages_bvec(i, pages, maxpages, start); 722 return get_pages_bvec(i, pages, maxsize, maxpages, start);
719 else 723 else
720 return get_pages_iovec(i, pages, maxpages, start); 724 return get_pages_iovec(i, pages, maxsize, maxpages, start);
721} 725}
722EXPORT_SYMBOL(iov_iter_get_pages); 726EXPORT_SYMBOL(iov_iter_get_pages);
723 727
diff --git a/mm/memblock.c b/mm/memblock.c
index 6d2f219a48b0..6ecb0d937fb5 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -192,8 +192,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
192 phys_addr_t align, phys_addr_t start, 192 phys_addr_t align, phys_addr_t start,
193 phys_addr_t end, int nid) 193 phys_addr_t end, int nid)
194{ 194{
195 int ret; 195 phys_addr_t kernel_end, ret;
196 phys_addr_t kernel_end;
197 196
198 /* pump up @end */ 197 /* pump up @end */
199 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 198 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
@@ -817,6 +816,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
817 if (nid != NUMA_NO_NODE && nid != m_nid) 816 if (nid != NUMA_NO_NODE && nid != m_nid)
818 continue; 817 continue;
819 818
819 /* skip hotpluggable memory regions if needed */
820 if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
821 continue;
822
820 if (!type_b) { 823 if (!type_b) {
821 if (out_start) 824 if (out_start)
822 *out_start = m_start; 825 *out_start = m_start;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ec4dcf1b9562..28928ce9b07f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -292,6 +292,9 @@ struct mem_cgroup {
292 /* vmpressure notifications */ 292 /* vmpressure notifications */
293 struct vmpressure vmpressure; 293 struct vmpressure vmpressure;
294 294
295 /* css_online() has been completed */
296 int initialized;
297
295 /* 298 /*
296 * the counter to account for mem+swap usage. 299 * the counter to account for mem+swap usage.
297 */ 300 */
@@ -1099,10 +1102,21 @@ skip_node:
1099 * skipping css reference should be safe. 1102 * skipping css reference should be safe.
1100 */ 1103 */
1101 if (next_css) { 1104 if (next_css) {
1102 if ((next_css == &root->css) || 1105 struct mem_cgroup *memcg = mem_cgroup_from_css(next_css);
1103 ((next_css->flags & CSS_ONLINE) && 1106
1104 css_tryget_online(next_css))) 1107 if (next_css == &root->css)
1105 return mem_cgroup_from_css(next_css); 1108 return memcg;
1109
1110 if (css_tryget_online(next_css)) {
1111 /*
1112 * Make sure the memcg is initialized:
1113 * mem_cgroup_css_online() orders the the
1114 * initialization against setting the flag.
1115 */
1116 if (smp_load_acquire(&memcg->initialized))
1117 return memcg;
1118 css_put(next_css);
1119 }
1106 1120
1107 prev_css = next_css; 1121 prev_css = next_css;
1108 goto skip_node; 1122 goto skip_node;
@@ -2534,6 +2548,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2534 unsigned long long size; 2548 unsigned long long size;
2535 int ret = 0; 2549 int ret = 0;
2536 2550
2551 if (mem_cgroup_is_root(memcg))
2552 goto done;
2537retry: 2553retry:
2538 if (consume_stock(memcg, nr_pages)) 2554 if (consume_stock(memcg, nr_pages))
2539 goto done; 2555 goto done;
@@ -2611,9 +2627,7 @@ nomem:
2611 if (!(gfp_mask & __GFP_NOFAIL)) 2627 if (!(gfp_mask & __GFP_NOFAIL))
2612 return -ENOMEM; 2628 return -ENOMEM;
2613bypass: 2629bypass:
2614 memcg = root_mem_cgroup; 2630 return -EINTR;
2615 ret = -EINTR;
2616 goto retry;
2617 2631
2618done_restock: 2632done_restock:
2619 if (batch > nr_pages) 2633 if (batch > nr_pages)
@@ -2626,6 +2640,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2626{ 2640{
2627 unsigned long bytes = nr_pages * PAGE_SIZE; 2641 unsigned long bytes = nr_pages * PAGE_SIZE;
2628 2642
2643 if (mem_cgroup_is_root(memcg))
2644 return;
2645
2629 res_counter_uncharge(&memcg->res, bytes); 2646 res_counter_uncharge(&memcg->res, bytes);
2630 if (do_swap_account) 2647 if (do_swap_account)
2631 res_counter_uncharge(&memcg->memsw, bytes); 2648 res_counter_uncharge(&memcg->memsw, bytes);
@@ -2640,6 +2657,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2640{ 2657{
2641 unsigned long bytes = nr_pages * PAGE_SIZE; 2658 unsigned long bytes = nr_pages * PAGE_SIZE;
2642 2659
2660 if (mem_cgroup_is_root(memcg))
2661 return;
2662
2643 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); 2663 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
2644 if (do_swap_account) 2664 if (do_swap_account)
2645 res_counter_uncharge_until(&memcg->memsw, 2665 res_counter_uncharge_until(&memcg->memsw,
@@ -4093,6 +4113,46 @@ out:
4093 return retval; 4113 return retval;
4094} 4114}
4095 4115
4116static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
4117 enum mem_cgroup_stat_index idx)
4118{
4119 struct mem_cgroup *iter;
4120 long val = 0;
4121
4122 /* Per-cpu values can be negative, use a signed accumulator */
4123 for_each_mem_cgroup_tree(iter, memcg)
4124 val += mem_cgroup_read_stat(iter, idx);
4125
4126 if (val < 0) /* race ? */
4127 val = 0;
4128 return val;
4129}
4130
4131static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
4132{
4133 u64 val;
4134
4135 if (!mem_cgroup_is_root(memcg)) {
4136 if (!swap)
4137 return res_counter_read_u64(&memcg->res, RES_USAGE);
4138 else
4139 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
4140 }
4141
4142 /*
4143 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
4144 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
4145 */
4146 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
4147 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
4148
4149 if (swap)
4150 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
4151
4152 return val << PAGE_SHIFT;
4153}
4154
4155
4096static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, 4156static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
4097 struct cftype *cft) 4157 struct cftype *cft)
4098{ 4158{
@@ -4102,8 +4162,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
4102 4162
4103 switch (type) { 4163 switch (type) {
4104 case _MEM: 4164 case _MEM:
4165 if (name == RES_USAGE)
4166 return mem_cgroup_usage(memcg, false);
4105 return res_counter_read_u64(&memcg->res, name); 4167 return res_counter_read_u64(&memcg->res, name);
4106 case _MEMSWAP: 4168 case _MEMSWAP:
4169 if (name == RES_USAGE)
4170 return mem_cgroup_usage(memcg, true);
4107 return res_counter_read_u64(&memcg->memsw, name); 4171 return res_counter_read_u64(&memcg->memsw, name);
4108 case _KMEM: 4172 case _KMEM:
4109 return res_counter_read_u64(&memcg->kmem, name); 4173 return res_counter_read_u64(&memcg->kmem, name);
@@ -4572,10 +4636,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
4572 if (!t) 4636 if (!t)
4573 goto unlock; 4637 goto unlock;
4574 4638
4575 if (!swap) 4639 usage = mem_cgroup_usage(memcg, swap);
4576 usage = res_counter_read_u64(&memcg->res, RES_USAGE);
4577 else
4578 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
4579 4640
4580 /* 4641 /*
4581 * current_threshold points to threshold just below or equal to usage. 4642 * current_threshold points to threshold just below or equal to usage.
@@ -4673,10 +4734,10 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
4673 4734
4674 if (type == _MEM) { 4735 if (type == _MEM) {
4675 thresholds = &memcg->thresholds; 4736 thresholds = &memcg->thresholds;
4676 usage = res_counter_read_u64(&memcg->res, RES_USAGE); 4737 usage = mem_cgroup_usage(memcg, false);
4677 } else if (type == _MEMSWAP) { 4738 } else if (type == _MEMSWAP) {
4678 thresholds = &memcg->memsw_thresholds; 4739 thresholds = &memcg->memsw_thresholds;
4679 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 4740 usage = mem_cgroup_usage(memcg, true);
4680 } else 4741 } else
4681 BUG(); 4742 BUG();
4682 4743
@@ -4762,10 +4823,10 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
4762 4823
4763 if (type == _MEM) { 4824 if (type == _MEM) {
4764 thresholds = &memcg->thresholds; 4825 thresholds = &memcg->thresholds;
4765 usage = res_counter_read_u64(&memcg->res, RES_USAGE); 4826 usage = mem_cgroup_usage(memcg, false);
4766 } else if (type == _MEMSWAP) { 4827 } else if (type == _MEMSWAP) {
4767 thresholds = &memcg->memsw_thresholds; 4828 thresholds = &memcg->memsw_thresholds;
4768 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 4829 usage = mem_cgroup_usage(memcg, true);
4769 } else 4830 } else
4770 BUG(); 4831 BUG();
4771 4832
@@ -5502,6 +5563,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5502{ 5563{
5503 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5564 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
5504 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); 5565 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
5566 int ret;
5505 5567
5506 if (css->id > MEM_CGROUP_ID_MAX) 5568 if (css->id > MEM_CGROUP_ID_MAX)
5507 return -ENOSPC; 5569 return -ENOSPC;
@@ -5525,9 +5587,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5525 * core guarantees its existence. 5587 * core guarantees its existence.
5526 */ 5588 */
5527 } else { 5589 } else {
5528 res_counter_init(&memcg->res, &root_mem_cgroup->res); 5590 res_counter_init(&memcg->res, NULL);
5529 res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); 5591 res_counter_init(&memcg->memsw, NULL);
5530 res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); 5592 res_counter_init(&memcg->kmem, NULL);
5531 /* 5593 /*
5532 * Deeper hierachy with use_hierarchy == false doesn't make 5594 * Deeper hierachy with use_hierarchy == false doesn't make
5533 * much sense so let cgroup subsystem know about this 5595 * much sense so let cgroup subsystem know about this
@@ -5538,7 +5600,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5538 } 5600 }
5539 mutex_unlock(&memcg_create_mutex); 5601 mutex_unlock(&memcg_create_mutex);
5540 5602
5541 return memcg_init_kmem(memcg, &memory_cgrp_subsys); 5603 ret = memcg_init_kmem(memcg, &memory_cgrp_subsys);
5604 if (ret)
5605 return ret;
5606
5607 /*
5608 * Make sure the memcg is initialized: mem_cgroup_iter()
5609 * orders reading memcg->initialized against its callers
5610 * reading the memcg members.
5611 */
5612 smp_store_release(&memcg->initialized, 1);
5613
5614 return 0;
5542} 5615}
5543 5616
5544/* 5617/*
@@ -5969,8 +6042,9 @@ static void __mem_cgroup_clear_mc(void)
5969 /* we must fixup refcnts and charges */ 6042 /* we must fixup refcnts and charges */
5970 if (mc.moved_swap) { 6043 if (mc.moved_swap) {
5971 /* uncharge swap account from the old cgroup */ 6044 /* uncharge swap account from the old cgroup */
5972 res_counter_uncharge(&mc.from->memsw, 6045 if (!mem_cgroup_is_root(mc.from))
5973 PAGE_SIZE * mc.moved_swap); 6046 res_counter_uncharge(&mc.from->memsw,
6047 PAGE_SIZE * mc.moved_swap);
5974 6048
5975 for (i = 0; i < mc.moved_swap; i++) 6049 for (i = 0; i < mc.moved_swap; i++)
5976 css_put(&mc.from->css); 6050 css_put(&mc.from->css);
@@ -5979,8 +6053,9 @@ static void __mem_cgroup_clear_mc(void)
5979 * we charged both to->res and to->memsw, so we should 6053 * we charged both to->res and to->memsw, so we should
5980 * uncharge to->res. 6054 * uncharge to->res.
5981 */ 6055 */
5982 res_counter_uncharge(&mc.to->res, 6056 if (!mem_cgroup_is_root(mc.to))
5983 PAGE_SIZE * mc.moved_swap); 6057 res_counter_uncharge(&mc.to->res,
6058 PAGE_SIZE * mc.moved_swap);
5984 /* we've already done css_get(mc.to) */ 6059 /* we've already done css_get(mc.to) */
5985 mc.moved_swap = 0; 6060 mc.moved_swap = 0;
5986 } 6061 }
@@ -6345,7 +6420,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
6345 rcu_read_lock(); 6420 rcu_read_lock();
6346 memcg = mem_cgroup_lookup(id); 6421 memcg = mem_cgroup_lookup(id);
6347 if (memcg) { 6422 if (memcg) {
6348 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 6423 if (!mem_cgroup_is_root(memcg))
6424 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
6349 mem_cgroup_swap_statistics(memcg, false); 6425 mem_cgroup_swap_statistics(memcg, false);
6350 css_put(&memcg->css); 6426 css_put(&memcg->css);
6351 } 6427 }
@@ -6509,12 +6585,15 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
6509{ 6585{
6510 unsigned long flags; 6586 unsigned long flags;
6511 6587
6512 if (nr_mem) 6588 if (!mem_cgroup_is_root(memcg)) {
6513 res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE); 6589 if (nr_mem)
6514 if (nr_memsw) 6590 res_counter_uncharge(&memcg->res,
6515 res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE); 6591 nr_mem * PAGE_SIZE);
6516 6592 if (nr_memsw)
6517 memcg_oom_recover(memcg); 6593 res_counter_uncharge(&memcg->memsw,
6594 nr_memsw * PAGE_SIZE);
6595 memcg_oom_recover(memcg);
6596 }
6518 6597
6519 local_irq_save(flags); 6598 local_irq_save(flags);
6520 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); 6599 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
diff --git a/mm/memory.c b/mm/memory.c
index ab3537bcfed2..e229970e4223 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
118unsigned long zero_pfn __read_mostly; 118unsigned long zero_pfn __read_mostly;
119unsigned long highest_memmap_pfn __read_mostly; 119unsigned long highest_memmap_pfn __read_mostly;
120 120
121EXPORT_SYMBOL(zero_pfn);
122
121/* 123/*
122 * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() 124 * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
123 */ 125 */
@@ -751,7 +753,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
751 unsigned long pfn = pte_pfn(pte); 753 unsigned long pfn = pte_pfn(pte);
752 754
753 if (HAVE_PTE_SPECIAL) { 755 if (HAVE_PTE_SPECIAL) {
754 if (likely(!pte_special(pte) || pte_numa(pte))) 756 if (likely(!pte_special(pte)))
755 goto check_pfn; 757 goto check_pfn;
756 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) 758 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
757 return NULL; 759 return NULL;
@@ -777,15 +779,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
777 } 779 }
778 } 780 }
779 781
782 if (is_zero_pfn(pfn))
783 return NULL;
780check_pfn: 784check_pfn:
781 if (unlikely(pfn > highest_memmap_pfn)) { 785 if (unlikely(pfn > highest_memmap_pfn)) {
782 print_bad_pte(vma, addr, pte, NULL); 786 print_bad_pte(vma, addr, pte, NULL);
783 return NULL; 787 return NULL;
784 } 788 }
785 789
786 if (is_zero_pfn(pfn))
787 return NULL;
788
789 /* 790 /*
790 * NOTE! We still have PageReserved() pages in the page tables. 791 * NOTE! We still have PageReserved() pages in the page tables.
791 * eg. VDSO mappings can cause them to exist. 792 * eg. VDSO mappings can cause them to exist.
@@ -1126,7 +1127,7 @@ again:
1126 addr) != page->index) { 1127 addr) != page->index) {
1127 pte_t ptfile = pgoff_to_pte(page->index); 1128 pte_t ptfile = pgoff_to_pte(page->index);
1128 if (pte_soft_dirty(ptent)) 1129 if (pte_soft_dirty(ptent))
1129 pte_file_mksoft_dirty(ptfile); 1130 ptfile = pte_file_mksoft_dirty(ptfile);
1130 set_pte_at(mm, addr, pte, ptfile); 1131 set_pte_at(mm, addr, pte, ptfile);
1131 } 1132 }
1132 if (PageAnon(page)) 1133 if (PageAnon(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index f78ec9bd454d..2740360cd216 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -146,8 +146,11 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
146 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 146 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
147 if (pte_swp_soft_dirty(*ptep)) 147 if (pte_swp_soft_dirty(*ptep))
148 pte = pte_mksoft_dirty(pte); 148 pte = pte_mksoft_dirty(pte);
149
150 /* Recheck VMA as permissions can change since migration started */
149 if (is_write_migration_entry(entry)) 151 if (is_write_migration_entry(entry))
150 pte = pte_mkwrite(pte); 152 pte = maybe_mkwrite(pte, vma);
153
151#ifdef CONFIG_HUGETLB_PAGE 154#ifdef CONFIG_HUGETLB_PAGE
152 if (PageHuge(new)) { 155 if (PageHuge(new)) {
153 pte = pte_mkhuge(pte); 156 pte = pte_mkhuge(pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4a0b99..c0a3637cdb64 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -369,20 +369,20 @@ static int browse_rb(struct rb_root *root)
369 struct vm_area_struct *vma; 369 struct vm_area_struct *vma;
370 vma = rb_entry(nd, struct vm_area_struct, vm_rb); 370 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
371 if (vma->vm_start < prev) { 371 if (vma->vm_start < prev) {
372 pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev); 372 pr_emerg("vm_start %lx prev %lx\n", vma->vm_start, prev);
373 bug = 1; 373 bug = 1;
374 } 374 }
375 if (vma->vm_start < pend) { 375 if (vma->vm_start < pend) {
376 pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend); 376 pr_emerg("vm_start %lx pend %lx\n", vma->vm_start, pend);
377 bug = 1; 377 bug = 1;
378 } 378 }
379 if (vma->vm_start > vma->vm_end) { 379 if (vma->vm_start > vma->vm_end) {
380 pr_info("vm_end %lx < vm_start %lx\n", 380 pr_emerg("vm_end %lx < vm_start %lx\n",
381 vma->vm_end, vma->vm_start); 381 vma->vm_end, vma->vm_start);
382 bug = 1; 382 bug = 1;
383 } 383 }
384 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { 384 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
385 pr_info("free gap %lx, correct %lx\n", 385 pr_emerg("free gap %lx, correct %lx\n",
386 vma->rb_subtree_gap, 386 vma->rb_subtree_gap,
387 vma_compute_subtree_gap(vma)); 387 vma_compute_subtree_gap(vma));
388 bug = 1; 388 bug = 1;
@@ -396,7 +396,7 @@ static int browse_rb(struct rb_root *root)
396 for (nd = pn; nd; nd = rb_prev(nd)) 396 for (nd = pn; nd; nd = rb_prev(nd))
397 j++; 397 j++;
398 if (i != j) { 398 if (i != j) {
399 pr_info("backwards %d, forwards %d\n", j, i); 399 pr_emerg("backwards %d, forwards %d\n", j, i);
400 bug = 1; 400 bug = 1;
401 } 401 }
402 return bug ? -1 : i; 402 return bug ? -1 : i;
@@ -431,17 +431,17 @@ static void validate_mm(struct mm_struct *mm)
431 i++; 431 i++;
432 } 432 }
433 if (i != mm->map_count) { 433 if (i != mm->map_count) {
434 pr_info("map_count %d vm_next %d\n", mm->map_count, i); 434 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
435 bug = 1; 435 bug = 1;
436 } 436 }
437 if (highest_address != mm->highest_vm_end) { 437 if (highest_address != mm->highest_vm_end) {
438 pr_info("mm->highest_vm_end %lx, found %lx\n", 438 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
439 mm->highest_vm_end, highest_address); 439 mm->highest_vm_end, highest_address);
440 bug = 1; 440 bug = 1;
441 } 441 }
442 i = browse_rb(&mm->mm_rb); 442 i = browse_rb(&mm->mm_rb);
443 if (i != mm->map_count) { 443 if (i != mm->map_count) {
444 pr_info("map_count %d rb %d\n", mm->map_count, i); 444 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
445 bug = 1; 445 bug = 1;
446 } 446 }
447 BUG_ON(bug); 447 BUG_ON(bug);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7ed58602e71b..7c7ab32ee503 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -119,6 +119,8 @@ static unsigned long __init free_low_memory_core_early(void)
119 phys_addr_t start, end; 119 phys_addr_t start, end;
120 u64 i; 120 u64 i;
121 121
122 memblock_clear_hotplug(0, -1);
123
122 for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) 124 for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
123 count += __free_memory_core(start, end); 125 count += __free_memory_core(start, end);
124 126
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 18cee0d4c8a2..eee961958021 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1612,7 +1612,7 @@ again:
1612 } 1612 }
1613 1613
1614 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); 1614 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1615 if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 && 1615 if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
1616 !zone_is_fair_depleted(zone)) 1616 !zone_is_fair_depleted(zone))
1617 zone_set_flag(zone, ZONE_FAIR_DEPLETED); 1617 zone_set_flag(zone, ZONE_FAIR_DEPLETED);
1618 1618
@@ -5701,9 +5701,8 @@ static void __setup_per_zone_wmarks(void)
5701 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); 5701 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
5702 5702
5703 __mod_zone_page_state(zone, NR_ALLOC_BATCH, 5703 __mod_zone_page_state(zone, NR_ALLOC_BATCH,
5704 high_wmark_pages(zone) - 5704 high_wmark_pages(zone) - low_wmark_pages(zone) -
5705 low_wmark_pages(zone) - 5705 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
5706 zone_page_state(zone, NR_ALLOC_BATCH));
5707 5706
5708 setup_zone_migrate_reserve(zone); 5707 setup_zone_migrate_reserve(zone);
5709 spin_unlock_irqrestore(&zone->lock, flags); 5708 spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 3707c71ae4cd..51108165f829 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -108,7 +108,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
108 int page_start, int page_end) 108 int page_start, int page_end)
109{ 109{
110 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 110 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
111 unsigned int cpu; 111 unsigned int cpu, tcpu;
112 int i; 112 int i;
113 113
114 for_each_possible_cpu(cpu) { 114 for_each_possible_cpu(cpu) {
@@ -116,14 +116,23 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
116 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 116 struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
117 117
118 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 118 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
119 if (!*pagep) { 119 if (!*pagep)
120 pcpu_free_pages(chunk, pages, populated, 120 goto err;
121 page_start, page_end);
122 return -ENOMEM;
123 }
124 } 121 }
125 } 122 }
126 return 0; 123 return 0;
124
125err:
126 while (--i >= page_start)
127 __free_page(pages[pcpu_page_idx(cpu, i)]);
128
129 for_each_possible_cpu(tcpu) {
130 if (tcpu == cpu)
131 break;
132 for (i = page_start; i < page_end; i++)
133 __free_page(pages[pcpu_page_idx(tcpu, i)]);
134 }
135 return -ENOMEM;
127} 136}
128 137
129/** 138/**
@@ -263,6 +272,7 @@ err:
263 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 272 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
264 page_end - page_start); 273 page_end - page_start);
265 } 274 }
275 pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
266 return err; 276 return err;
267} 277}
268 278
diff --git a/mm/percpu.c b/mm/percpu.c
index 2139e30a4b44..da997f9800bd 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1932,6 +1932,8 @@ void __init setup_per_cpu_areas(void)
1932 1932
1933 if (pcpu_setup_first_chunk(ai, fc) < 0) 1933 if (pcpu_setup_first_chunk(ai, fc) < 0)
1934 panic("Failed to initialize percpu areas."); 1934 panic("Failed to initialize percpu areas.");
1935
1936 pcpu_free_alloc_info(ai);
1935} 1937}
1936 1938
1937#endif /* CONFIG_SMP */ 1939#endif /* CONFIG_SMP */
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index a8b919925934..dfb79e028ecb 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -195,7 +195,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
195 pmd_t entry = *pmdp; 195 pmd_t entry = *pmdp;
196 if (pmd_numa(entry)) 196 if (pmd_numa(entry))
197 entry = pmd_mknonnuma(entry); 197 entry = pmd_mknonnuma(entry);
198 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp)); 198 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
199 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 199 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
200} 200}
201#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 201#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/shmem.c b/mm/shmem.c
index 0e5fb225007c..469f90d56051 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2367,8 +2367,10 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
2367 2367
2368 if (new_dentry->d_inode) { 2368 if (new_dentry->d_inode) {
2369 (void) shmem_unlink(new_dir, new_dentry); 2369 (void) shmem_unlink(new_dir, new_dentry);
2370 if (they_are_dirs) 2370 if (they_are_dirs) {
2371 drop_nlink(new_dentry->d_inode);
2371 drop_nlink(old_dir); 2372 drop_nlink(old_dir);
2373 }
2372 } else if (they_are_dirs) { 2374 } else if (they_are_dirs) {
2373 drop_nlink(old_dir); 2375 drop_nlink(old_dir);
2374 inc_nlink(new_dir); 2376 inc_nlink(new_dir);
diff --git a/mm/slab.c b/mm/slab.c
index a467b308c682..7c52b3890d25 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2124,7 +2124,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2124int 2124int
2125__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) 2125__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2126{ 2126{
2127 size_t left_over, freelist_size, ralign; 2127 size_t left_over, freelist_size;
2128 size_t ralign = BYTES_PER_WORD;
2128 gfp_t gfp; 2129 gfp_t gfp;
2129 int err; 2130 int err;
2130 size_t size = cachep->size; 2131 size_t size = cachep->size;
@@ -2157,14 +2158,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2157 size &= ~(BYTES_PER_WORD - 1); 2158 size &= ~(BYTES_PER_WORD - 1);
2158 } 2159 }
2159 2160
2160 /*
2161 * Redzoning and user store require word alignment or possibly larger.
2162 * Note this will be overridden by architecture or caller mandated
2163 * alignment if either is greater than BYTES_PER_WORD.
2164 */
2165 if (flags & SLAB_STORE_USER)
2166 ralign = BYTES_PER_WORD;
2167
2168 if (flags & SLAB_RED_ZONE) { 2161 if (flags & SLAB_RED_ZONE) {
2169 ralign = REDZONE_ALIGN; 2162 ralign = REDZONE_ALIGN;
2170 /* If redzoning, ensure that the second redzone is suitably 2163 /* If redzoning, ensure that the second redzone is suitably
@@ -2994,7 +2987,7 @@ out:
2994 2987
2995#ifdef CONFIG_NUMA 2988#ifdef CONFIG_NUMA
2996/* 2989/*
2997 * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set. 2990 * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
2998 * 2991 *
2999 * If we are in_interrupt, then process context, including cpusets and 2992 * If we are in_interrupt, then process context, including cpusets and
3000 * mempolicy, may not apply and should not be used for allocation policy. 2993 * mempolicy, may not apply and should not be used for allocation policy.
@@ -3226,7 +3219,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3226{ 3219{
3227 void *objp; 3220 void *objp;
3228 3221
3229 if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) { 3222 if (current->mempolicy || cpuset_do_slab_mem_spread()) {
3230 objp = alternate_node_alloc(cache, flags); 3223 objp = alternate_node_alloc(cache, flags);
3231 if (objp) 3224 if (objp)
3232 goto out; 3225 goto out;
diff --git a/mm/zbud.c b/mm/zbud.c
index a05790b1915e..f26e7fcc7fa2 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -195,6 +195,7 @@ static struct zpool_driver zbud_zpool_driver = {
195 .total_size = zbud_zpool_total_size, 195 .total_size = zbud_zpool_total_size,
196}; 196};
197 197
198MODULE_ALIAS("zpool-zbud");
198#endif /* CONFIG_ZPOOL */ 199#endif /* CONFIG_ZPOOL */
199 200
200/***************** 201/*****************
diff --git a/mm/zpool.c b/mm/zpool.c
index e40612a1df00..739cdf0d183a 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -150,7 +150,7 @@ struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
150 driver = zpool_get_driver(type); 150 driver = zpool_get_driver(type);
151 151
152 if (!driver) { 152 if (!driver) {
153 request_module(type); 153 request_module("zpool-%s", type);
154 driver = zpool_get_driver(type); 154 driver = zpool_get_driver(type);
155 } 155 }
156 156
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 4e2fc83cb394..94f38fac5e81 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -315,6 +315,7 @@ static struct zpool_driver zs_zpool_driver = {
315 .total_size = zs_zpool_total_size, 315 .total_size = zs_zpool_total_size,
316}; 316};
317 317
318MODULE_ALIAS("zpool-zsmalloc");
318#endif /* CONFIG_ZPOOL */ 319#endif /* CONFIG_ZPOOL */
319 320
320/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 321/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */