aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/balloon_compaction.c2
-rw-r--r--mm/compaction.c3
-rw-r--r--mm/huge_memory.c15
-rw-r--r--mm/memcontrol.c105
-rw-r--r--mm/memory.c1
-rw-r--r--mm/memory_hotplug.c5
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page-writeback.c43
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c1
-rw-r--r--mm/rmap.c88
-rw-r--r--mm/shmem.c36
-rw-r--r--mm/slab_common.c10
-rw-r--r--mm/truncate.c57
15 files changed, 259 insertions, 140 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index b3cbe19f71b5..fcad8322ef36 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
68 * to be released by the balloon driver. 68 * to be released by the balloon driver.
69 */ 69 */
70 if (trylock_page(page)) { 70 if (trylock_page(page)) {
71#ifdef CONFIG_BALLOON_COMPACTION
71 if (!PagePrivate(page)) { 72 if (!PagePrivate(page)) {
72 /* raced with isolation */ 73 /* raced with isolation */
73 unlock_page(page); 74 unlock_page(page);
74 continue; 75 continue;
75 } 76 }
77#endif
76 spin_lock_irqsave(&b_dev_info->pages_lock, flags); 78 spin_lock_irqsave(&b_dev_info->pages_lock, flags);
77 balloon_page_delete(page); 79 balloon_page_delete(page);
78 __count_vm_event(BALLOON_DEFLATE); 80 __count_vm_event(BALLOON_DEFLATE);
diff --git a/mm/compaction.c b/mm/compaction.c
index edba18aed173..ec74cf0123ef 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -784,6 +784,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
784 cc->nr_migratepages = 0; 784 cc->nr_migratepages = 0;
785 break; 785 break;
786 } 786 }
787
788 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
789 break;
787 } 790 }
788 acct_isolated(cc->zone, cc); 791 acct_isolated(cc->zone, cc);
789 792
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74c78aa8bc2f..de984159cf0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -200,7 +200,7 @@ retry:
200 preempt_disable(); 200 preempt_disable();
201 if (cmpxchg(&huge_zero_page, NULL, zero_page)) { 201 if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
202 preempt_enable(); 202 preempt_enable();
203 __free_page(zero_page); 203 __free_pages(zero_page, compound_order(zero_page));
204 goto retry; 204 goto retry;
205 } 205 }
206 206
@@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
232 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { 232 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
233 struct page *zero_page = xchg(&huge_zero_page, NULL); 233 struct page *zero_page = xchg(&huge_zero_page, NULL);
234 BUG_ON(zero_page == NULL); 234 BUG_ON(zero_page == NULL);
235 __free_page(zero_page); 235 __free_pages(zero_page, compound_order(zero_page));
236 return HPAGE_PMD_NR; 236 return HPAGE_PMD_NR;
237 } 237 }
238 238
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
803 return VM_FAULT_FALLBACK; 803 return VM_FAULT_FALLBACK;
804 if (unlikely(anon_vma_prepare(vma))) 804 if (unlikely(anon_vma_prepare(vma)))
805 return VM_FAULT_OOM; 805 return VM_FAULT_OOM;
806 if (unlikely(khugepaged_enter(vma))) 806 if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
807 return VM_FAULT_OOM; 807 return VM_FAULT_OOM;
808 if (!(flags & FAULT_FLAG_WRITE) && 808 if (!(flags & FAULT_FLAG_WRITE) &&
809 transparent_hugepage_use_zero_page()) { 809 transparent_hugepage_use_zero_page()) {
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1970 * register it here without waiting a page fault that 1970 * register it here without waiting a page fault that
1971 * may not happen any time soon. 1971 * may not happen any time soon.
1972 */ 1972 */
1973 if (unlikely(khugepaged_enter_vma_merge(vma))) 1973 if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
1974 return -ENOMEM; 1974 return -ENOMEM;
1975 break; 1975 break;
1976 case MADV_NOHUGEPAGE: 1976 case MADV_NOHUGEPAGE:
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm)
2071 return 0; 2071 return 0;
2072} 2072}
2073 2073
2074int khugepaged_enter_vma_merge(struct vm_area_struct *vma) 2074int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
2075 unsigned long vm_flags)
2075{ 2076{
2076 unsigned long hstart, hend; 2077 unsigned long hstart, hend;
2077 if (!vma->anon_vma) 2078 if (!vma->anon_vma)
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
2083 if (vma->vm_ops) 2084 if (vma->vm_ops)
2084 /* khugepaged not yet working on file or special mappings */ 2085 /* khugepaged not yet working on file or special mappings */
2085 return 0; 2086 return 0;
2086 VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); 2087 VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
2087 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2088 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2088 hend = vma->vm_end & HPAGE_PMD_MASK; 2089 hend = vma->vm_end & HPAGE_PMD_MASK;
2089 if (hstart < hend) 2090 if (hstart < hend)
2090 return khugepaged_enter(vma); 2091 return khugepaged_enter(vma, vm_flags);
2091 return 0; 2092 return 0;
2092} 2093}
2093 2094
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 23976fd885fd..d6ac0e33e150 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1536 * start move here. 1536 * start move here.
1537 */ 1537 */
1538 1538
1539/* for quick checking without looking up memcg */
1540atomic_t memcg_moving __read_mostly;
1541
1542static void mem_cgroup_start_move(struct mem_cgroup *memcg) 1539static void mem_cgroup_start_move(struct mem_cgroup *memcg)
1543{ 1540{
1544 atomic_inc(&memcg_moving);
1545 atomic_inc(&memcg->moving_account); 1541 atomic_inc(&memcg->moving_account);
1546 synchronize_rcu(); 1542 synchronize_rcu();
1547} 1543}
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
1552 * Now, mem_cgroup_clear_mc() may call this function with NULL. 1548 * Now, mem_cgroup_clear_mc() may call this function with NULL.
1553 * We check NULL in callee rather than caller. 1549 * We check NULL in callee rather than caller.
1554 */ 1550 */
1555 if (memcg) { 1551 if (memcg)
1556 atomic_dec(&memcg_moving);
1557 atomic_dec(&memcg->moving_account); 1552 atomic_dec(&memcg->moving_account);
1558 }
1559} 1553}
1560 1554
1561/* 1555/*
@@ -2204,41 +2198,52 @@ cleanup:
2204 return true; 2198 return true;
2205} 2199}
2206 2200
2207/* 2201/**
2208 * Used to update mapped file or writeback or other statistics. 2202 * mem_cgroup_begin_page_stat - begin a page state statistics transaction
2203 * @page: page that is going to change accounted state
2204 * @locked: &memcg->move_lock slowpath was taken
2205 * @flags: IRQ-state flags for &memcg->move_lock
2209 * 2206 *
2210 * Notes: Race condition 2207 * This function must mark the beginning of an accounted page state
2208 * change to prevent double accounting when the page is concurrently
2209 * being moved to another memcg:
2211 * 2210 *
2212 * Charging occurs during page instantiation, while the page is 2211 * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
2213 * unmapped and locked in page migration, or while the page table is 2212 * if (TestClearPageState(page))
2214 * locked in THP migration. No race is possible. 2213 * mem_cgroup_update_page_stat(memcg, state, -1);
2214 * mem_cgroup_end_page_stat(memcg, locked, flags);
2215 * 2215 *
2216 * Uncharge happens to pages with zero references, no race possible. 2216 * The RCU lock is held throughout the transaction. The fast path can
2217 * get away without acquiring the memcg->move_lock (@locked is false)
2218 * because page moving starts with an RCU grace period.
2217 * 2219 *
2218 * Charge moving between groups is protected by checking mm->moving 2220 * The RCU lock also protects the memcg from being freed when the page
2219 * account and taking the move_lock in the slowpath. 2221 * state that is going to change is the only thing preventing the page
2222 * from being uncharged. E.g. end-writeback clearing PageWriteback(),
2223 * which allows migration to go ahead and uncharge the page before the
2224 * account transaction might be complete.
2220 */ 2225 */
2221 2226struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
2222void __mem_cgroup_begin_update_page_stat(struct page *page, 2227 bool *locked,
2223 bool *locked, unsigned long *flags) 2228 unsigned long *flags)
2224{ 2229{
2225 struct mem_cgroup *memcg; 2230 struct mem_cgroup *memcg;
2226 struct page_cgroup *pc; 2231 struct page_cgroup *pc;
2227 2232
2233 rcu_read_lock();
2234
2235 if (mem_cgroup_disabled())
2236 return NULL;
2237
2228 pc = lookup_page_cgroup(page); 2238 pc = lookup_page_cgroup(page);
2229again: 2239again:
2230 memcg = pc->mem_cgroup; 2240 memcg = pc->mem_cgroup;
2231 if (unlikely(!memcg || !PageCgroupUsed(pc))) 2241 if (unlikely(!memcg || !PageCgroupUsed(pc)))
2232 return; 2242 return NULL;
2233 /* 2243
2234 * If this memory cgroup is not under account moving, we don't 2244 *locked = false;
2235 * need to take move_lock_mem_cgroup(). Because we already hold
2236 * rcu_read_lock(), any calls to move_account will be delayed until
2237 * rcu_read_unlock().
2238 */
2239 VM_BUG_ON(!rcu_read_lock_held());
2240 if (atomic_read(&memcg->moving_account) <= 0) 2245 if (atomic_read(&memcg->moving_account) <= 0)
2241 return; 2246 return memcg;
2242 2247
2243 move_lock_mem_cgroup(memcg, flags); 2248 move_lock_mem_cgroup(memcg, flags);
2244 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { 2249 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
@@ -2246,36 +2251,40 @@ again:
2246 goto again; 2251 goto again;
2247 } 2252 }
2248 *locked = true; 2253 *locked = true;
2254
2255 return memcg;
2249} 2256}
2250 2257
2251void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) 2258/**
2259 * mem_cgroup_end_page_stat - finish a page state statistics transaction
2260 * @memcg: the memcg that was accounted against
2261 * @locked: value received from mem_cgroup_begin_page_stat()
2262 * @flags: value received from mem_cgroup_begin_page_stat()
2263 */
2264void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
2265 unsigned long flags)
2252{ 2266{
2253 struct page_cgroup *pc = lookup_page_cgroup(page); 2267 if (memcg && locked)
2268 move_unlock_mem_cgroup(memcg, &flags);
2254 2269
2255 /* 2270 rcu_read_unlock();
2256 * It's guaranteed that pc->mem_cgroup never changes while
2257 * lock is held because a routine modifies pc->mem_cgroup
2258 * should take move_lock_mem_cgroup().
2259 */
2260 move_unlock_mem_cgroup(pc->mem_cgroup, flags);
2261} 2271}
2262 2272
2263void mem_cgroup_update_page_stat(struct page *page, 2273/**
2274 * mem_cgroup_update_page_stat - update page state statistics
2275 * @memcg: memcg to account against
2276 * @idx: page state item to account
2277 * @val: number of pages (positive or negative)
2278 *
2279 * See mem_cgroup_begin_page_stat() for locking requirements.
2280 */
2281void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
2264 enum mem_cgroup_stat_index idx, int val) 2282 enum mem_cgroup_stat_index idx, int val)
2265{ 2283{
2266 struct mem_cgroup *memcg;
2267 struct page_cgroup *pc = lookup_page_cgroup(page);
2268 unsigned long uninitialized_var(flags);
2269
2270 if (mem_cgroup_disabled())
2271 return;
2272
2273 VM_BUG_ON(!rcu_read_lock_held()); 2284 VM_BUG_ON(!rcu_read_lock_held());
2274 memcg = pc->mem_cgroup;
2275 if (unlikely(!memcg || !PageCgroupUsed(pc)))
2276 return;
2277 2285
2278 this_cpu_add(memcg->stat->count[idx], val); 2286 if (memcg)
2287 this_cpu_add(memcg->stat->count[idx], val);
2279} 2288}
2280 2289
2281/* 2290/*
diff --git a/mm/memory.c b/mm/memory.c
index 1cc6bfbd872e..3e503831e042 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1147,6 +1147,7 @@ again:
1147 print_bad_pte(vma, addr, ptent, page); 1147 print_bad_pte(vma, addr, ptent, page);
1148 if (unlikely(!__tlb_remove_page(tlb, page))) { 1148 if (unlikely(!__tlb_remove_page(tlb, page))) {
1149 force_flush = 1; 1149 force_flush = 1;
1150 addr += PAGE_SIZE;
1150 break; 1151 break;
1151 } 1152 }
1152 continue; 1153 continue;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 29d8693d0c61..252e1dbbed86 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1912,7 +1912,6 @@ void try_offline_node(int nid)
1912 unsigned long start_pfn = pgdat->node_start_pfn; 1912 unsigned long start_pfn = pgdat->node_start_pfn;
1913 unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; 1913 unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
1914 unsigned long pfn; 1914 unsigned long pfn;
1915 struct page *pgdat_page = virt_to_page(pgdat);
1916 int i; 1915 int i;
1917 1916
1918 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 1917 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
@@ -1941,10 +1940,6 @@ void try_offline_node(int nid)
1941 node_set_offline(nid); 1940 node_set_offline(nid);
1942 unregister_one_node(nid); 1941 unregister_one_node(nid);
1943 1942
1944 if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
1945 /* node data is allocated from boot memory */
1946 return;
1947
1948 /* free waittable in each zone */ 1943 /* free waittable in each zone */
1949 for (i = 0; i < MAX_NR_ZONES; i++) { 1944 for (i = 0; i < MAX_NR_ZONES; i++) {
1950 struct zone *zone = pgdat->node_zones + i; 1945 struct zone *zone = pgdat->node_zones + i;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f855206e7fb..87e82b38453c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
1080 end, prev->vm_pgoff, NULL); 1080 end, prev->vm_pgoff, NULL);
1081 if (err) 1081 if (err)
1082 return NULL; 1082 return NULL;
1083 khugepaged_enter_vma_merge(prev); 1083 khugepaged_enter_vma_merge(prev, vm_flags);
1084 return prev; 1084 return prev;
1085 } 1085 }
1086 1086
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
1099 next->vm_pgoff - pglen, NULL); 1099 next->vm_pgoff - pglen, NULL);
1100 if (err) 1100 if (err)
1101 return NULL; 1101 return NULL;
1102 khugepaged_enter_vma_merge(area); 1102 khugepaged_enter_vma_merge(area, vm_flags);
1103 return area; 1103 return area;
1104 } 1104 }
1105 1105
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2208 } 2208 }
2209 } 2209 }
2210 vma_unlock_anon_vma(vma); 2210 vma_unlock_anon_vma(vma);
2211 khugepaged_enter_vma_merge(vma); 2211 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2212 validate_mm(vma->vm_mm); 2212 validate_mm(vma->vm_mm);
2213 return error; 2213 return error;
2214} 2214}
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
2277 } 2277 }
2278 } 2278 }
2279 vma_unlock_anon_vma(vma); 2279 vma_unlock_anon_vma(vma);
2280 khugepaged_enter_vma_merge(vma); 2280 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2281 validate_mm(vma->vm_mm); 2281 validate_mm(vma->vm_mm);
2282 return error; 2282 return error;
2283} 2283}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a3a18f..5340f6b91312 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
404 dump_tasks(memcg, nodemask); 404 dump_tasks(memcg, nodemask);
405} 405}
406 406
407/*
408 * Number of OOM killer invocations (including memcg OOM killer).
409 * Primarily used by PM freezer to check for potential races with
410 * OOM killed frozen task.
411 */
412static atomic_t oom_kills = ATOMIC_INIT(0);
413
414int oom_kills_count(void)
415{
416 return atomic_read(&oom_kills);
417}
418
419void note_oom_kill(void)
420{
421 atomic_inc(&oom_kills);
422}
423
407#define K(x) ((x) << (PAGE_SHIFT-10)) 424#define K(x) ((x) << (PAGE_SHIFT-10))
408/* 425/*
409 * Must be called while holding a reference to p, which will be released upon 426 * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff24c9d83112..19ceae87522d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2116,23 +2116,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
2116EXPORT_SYMBOL(account_page_dirtied); 2116EXPORT_SYMBOL(account_page_dirtied);
2117 2117
2118/* 2118/*
2119 * Helper function for set_page_writeback family.
2120 *
2121 * The caller must hold mem_cgroup_begin/end_update_page_stat() lock
2122 * while calling this function.
2123 * See test_set_page_writeback for example.
2124 *
2125 * NOTE: Unlike account_page_dirtied this does not rely on being atomic
2126 * wrt interrupts.
2127 */
2128void account_page_writeback(struct page *page)
2129{
2130 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2131 inc_zone_page_state(page, NR_WRITEBACK);
2132}
2133EXPORT_SYMBOL(account_page_writeback);
2134
2135/*
2136 * For address_spaces which do not use buffers. Just tag the page as dirty in 2119 * For address_spaces which do not use buffers. Just tag the page as dirty in
2137 * its radix tree. 2120 * its radix tree.
2138 * 2121 *
@@ -2344,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
2344int test_clear_page_writeback(struct page *page) 2327int test_clear_page_writeback(struct page *page)
2345{ 2328{
2346 struct address_space *mapping = page_mapping(page); 2329 struct address_space *mapping = page_mapping(page);
2347 int ret;
2348 bool locked;
2349 unsigned long memcg_flags; 2330 unsigned long memcg_flags;
2331 struct mem_cgroup *memcg;
2332 bool locked;
2333 int ret;
2350 2334
2351 mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); 2335 memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
2352 if (mapping) { 2336 if (mapping) {
2353 struct backing_dev_info *bdi = mapping->backing_dev_info; 2337 struct backing_dev_info *bdi = mapping->backing_dev_info;
2354 unsigned long flags; 2338 unsigned long flags;
@@ -2369,22 +2353,23 @@ int test_clear_page_writeback(struct page *page)
2369 ret = TestClearPageWriteback(page); 2353 ret = TestClearPageWriteback(page);
2370 } 2354 }
2371 if (ret) { 2355 if (ret) {
2372 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); 2356 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2373 dec_zone_page_state(page, NR_WRITEBACK); 2357 dec_zone_page_state(page, NR_WRITEBACK);
2374 inc_zone_page_state(page, NR_WRITTEN); 2358 inc_zone_page_state(page, NR_WRITTEN);
2375 } 2359 }
2376 mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); 2360 mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
2377 return ret; 2361 return ret;
2378} 2362}
2379 2363
2380int __test_set_page_writeback(struct page *page, bool keep_write) 2364int __test_set_page_writeback(struct page *page, bool keep_write)
2381{ 2365{
2382 struct address_space *mapping = page_mapping(page); 2366 struct address_space *mapping = page_mapping(page);
2383 int ret;
2384 bool locked;
2385 unsigned long memcg_flags; 2367 unsigned long memcg_flags;
2368 struct mem_cgroup *memcg;
2369 bool locked;
2370 int ret;
2386 2371
2387 mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); 2372 memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
2388 if (mapping) { 2373 if (mapping) {
2389 struct backing_dev_info *bdi = mapping->backing_dev_info; 2374 struct backing_dev_info *bdi = mapping->backing_dev_info;
2390 unsigned long flags; 2375 unsigned long flags;
@@ -2410,9 +2395,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
2410 } else { 2395 } else {
2411 ret = TestSetPageWriteback(page); 2396 ret = TestSetPageWriteback(page);
2412 } 2397 }
2413 if (!ret) 2398 if (!ret) {
2414 account_page_writeback(page); 2399 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2415 mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); 2400 inc_zone_page_state(page, NR_WRITEBACK);
2401 }
2402 mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
2416 return ret; 2403 return ret;
2417 2404
2418} 2405}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1b6381..9cd36b822444 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2252,6 +2252,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * PM-freezer should be notified that there might be an OOM killer on
2256 * its way to kill and wake somebody up. This is too early and we might
2257 * end up not killing anything but false positives are acceptable.
2258 * See freeze_processes.
2259 */
2260 note_oom_kill();
2261
2262 /*
2255 * Go through the zonelist yet one more time, keep very high watermark 2263 * Go through the zonelist yet one more time, keep very high watermark
2256 * here, this is only to catch a parallel oom killing, we must fail if 2264 * here, this is only to catch a parallel oom killing, we must fail if
2257 * we're still under heavy pressure. 2265 * we're still under heavy pressure.
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3708264d2833..5331c2bd85a2 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -171,6 +171,7 @@ static void free_page_cgroup(void *addr)
171 sizeof(struct page_cgroup) * PAGES_PER_SECTION; 171 sizeof(struct page_cgroup) * PAGES_PER_SECTION;
172 172
173 BUG_ON(PageReserved(page)); 173 BUG_ON(PageReserved(page));
174 kmemleak_free(addr);
174 free_pages_exact(addr, table_size); 175 free_pages_exact(addr, table_size);
175 } 176 }
176} 177}
diff --git a/mm/rmap.c b/mm/rmap.c
index 116a5053415b..19886fb2f13a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1042,15 +1042,46 @@ void page_add_new_anon_rmap(struct page *page,
1042 */ 1042 */
1043void page_add_file_rmap(struct page *page) 1043void page_add_file_rmap(struct page *page)
1044{ 1044{
1045 bool locked; 1045 struct mem_cgroup *memcg;
1046 unsigned long flags; 1046 unsigned long flags;
1047 bool locked;
1047 1048
1048 mem_cgroup_begin_update_page_stat(page, &locked, &flags); 1049 memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
1049 if (atomic_inc_and_test(&page->_mapcount)) { 1050 if (atomic_inc_and_test(&page->_mapcount)) {
1050 __inc_zone_page_state(page, NR_FILE_MAPPED); 1051 __inc_zone_page_state(page, NR_FILE_MAPPED);
1051 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); 1052 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
1052 } 1053 }
1053 mem_cgroup_end_update_page_stat(page, &locked, &flags); 1054 mem_cgroup_end_page_stat(memcg, locked, flags);
1055}
1056
1057static void page_remove_file_rmap(struct page *page)
1058{
1059 struct mem_cgroup *memcg;
1060 unsigned long flags;
1061 bool locked;
1062
1063 memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
1064
1065 /* page still mapped by someone else? */
1066 if (!atomic_add_negative(-1, &page->_mapcount))
1067 goto out;
1068
1069 /* Hugepages are not counted in NR_FILE_MAPPED for now. */
1070 if (unlikely(PageHuge(page)))
1071 goto out;
1072
1073 /*
1074 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1075 * these counters are not modified in interrupt context, and
1076 * pte lock(a spinlock) is held, which implies preemption disabled.
1077 */
1078 __dec_zone_page_state(page, NR_FILE_MAPPED);
1079 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
1080
1081 if (unlikely(PageMlocked(page)))
1082 clear_page_mlock(page);
1083out:
1084 mem_cgroup_end_page_stat(memcg, locked, flags);
1054} 1085}
1055 1086
1056/** 1087/**
@@ -1061,46 +1092,33 @@ void page_add_file_rmap(struct page *page)
1061 */ 1092 */
1062void page_remove_rmap(struct page *page) 1093void page_remove_rmap(struct page *page)
1063{ 1094{
1064 bool anon = PageAnon(page); 1095 if (!PageAnon(page)) {
1065 bool locked; 1096 page_remove_file_rmap(page);
1066 unsigned long flags; 1097 return;
1067 1098 }
1068 /*
1069 * The anon case has no mem_cgroup page_stat to update; but may
1070 * uncharge_page() below, where the lock ordering can deadlock if
1071 * we hold the lock against page_stat move: so avoid it on anon.
1072 */
1073 if (!anon)
1074 mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1075 1099
1076 /* page still mapped by someone else? */ 1100 /* page still mapped by someone else? */
1077 if (!atomic_add_negative(-1, &page->_mapcount)) 1101 if (!atomic_add_negative(-1, &page->_mapcount))
1078 goto out; 1102 return;
1103
1104 /* Hugepages are not counted in NR_ANON_PAGES for now. */
1105 if (unlikely(PageHuge(page)))
1106 return;
1079 1107
1080 /* 1108 /*
1081 * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
1082 * and not charged by memcg for now.
1083 *
1084 * We use the irq-unsafe __{inc|mod}_zone_page_stat because 1109 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1085 * these counters are not modified in interrupt context, and 1110 * these counters are not modified in interrupt context, and
1086 * these counters are not modified in interrupt context, and
1087 * pte lock(a spinlock) is held, which implies preemption disabled. 1111 * pte lock(a spinlock) is held, which implies preemption disabled.
1088 */ 1112 */
1089 if (unlikely(PageHuge(page))) 1113 if (PageTransHuge(page))
1090 goto out; 1114 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1091 if (anon) { 1115
1092 if (PageTransHuge(page)) 1116 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1093 __dec_zone_page_state(page, 1117 -hpage_nr_pages(page));
1094 NR_ANON_TRANSPARENT_HUGEPAGES); 1118
1095 __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1096 -hpage_nr_pages(page));
1097 } else {
1098 __dec_zone_page_state(page, NR_FILE_MAPPED);
1099 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
1100 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1101 }
1102 if (unlikely(PageMlocked(page))) 1119 if (unlikely(PageMlocked(page)))
1103 clear_page_mlock(page); 1120 clear_page_mlock(page);
1121
1104 /* 1122 /*
1105 * It would be tidy to reset the PageAnon mapping here, 1123 * It would be tidy to reset the PageAnon mapping here,
1106 * but that might overwrite a racing page_add_anon_rmap 1124 * but that might overwrite a racing page_add_anon_rmap
@@ -1110,10 +1128,6 @@ void page_remove_rmap(struct page *page)
1110 * Leaving it set also helps swapoff to reinstate ptes 1128 * Leaving it set also helps swapoff to reinstate ptes
1111 * faster for those pages still in swapcache. 1129 * faster for those pages still in swapcache.
1112 */ 1130 */
1113 return;
1114out:
1115 if (!anon)
1116 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1117} 1131}
1118 1132
1119/* 1133/*
diff --git a/mm/shmem.c b/mm/shmem.c
index cd6fc7590e54..185836ba53ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2345,6 +2345,32 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
2345 return 0; 2345 return 0;
2346} 2346}
2347 2347
2348static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2349{
2350 struct dentry *whiteout;
2351 int error;
2352
2353 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2354 if (!whiteout)
2355 return -ENOMEM;
2356
2357 error = shmem_mknod(old_dir, whiteout,
2358 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2359 dput(whiteout);
2360 if (error)
2361 return error;
2362
2363 /*
2364 * Cheat and hash the whiteout while the old dentry is still in
2365 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
2366 *
2367 * d_lookup() will consistently find one of them at this point,
2368 * not sure which one, but that isn't even important.
2369 */
2370 d_rehash(whiteout);
2371 return 0;
2372}
2373
2348/* 2374/*
2349 * The VFS layer already does all the dentry stuff for rename, 2375 * The VFS layer already does all the dentry stuff for rename,
2350 * we just have to decrement the usage count for the target if 2376 * we just have to decrement the usage count for the target if
@@ -2356,7 +2382,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
2356 struct inode *inode = old_dentry->d_inode; 2382 struct inode *inode = old_dentry->d_inode;
2357 int they_are_dirs = S_ISDIR(inode->i_mode); 2383 int they_are_dirs = S_ISDIR(inode->i_mode);
2358 2384
2359 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 2385 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2360 return -EINVAL; 2386 return -EINVAL;
2361 2387
2362 if (flags & RENAME_EXCHANGE) 2388 if (flags & RENAME_EXCHANGE)
@@ -2365,6 +2391,14 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
2365 if (!simple_empty(new_dentry)) 2391 if (!simple_empty(new_dentry))
2366 return -ENOTEMPTY; 2392 return -ENOTEMPTY;
2367 2393
2394 if (flags & RENAME_WHITEOUT) {
2395 int error;
2396
2397 error = shmem_whiteout(old_dir, old_dentry);
2398 if (error)
2399 return error;
2400 }
2401
2368 if (new_dentry->d_inode) { 2402 if (new_dentry->d_inode) {
2369 (void) shmem_unlink(new_dir, new_dentry); 2403 (void) shmem_unlink(new_dir, new_dentry);
2370 if (they_are_dirs) { 2404 if (they_are_dirs) {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3a6e0cfdf03a..406944207b61 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -93,16 +93,6 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
93 s->object_size); 93 s->object_size);
94 continue; 94 continue;
95 } 95 }
96
97#if !defined(CONFIG_SLUB)
98 if (!strcmp(s->name, name)) {
99 pr_err("%s (%s): Cache name already exists.\n",
100 __func__, name);
101 dump_stack();
102 s = NULL;
103 return -EINVAL;
104 }
105#endif
106 } 96 }
107 97
108 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 98 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d167372d89..261eaf6e5a19 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
20#include <linux/buffer_head.h> /* grr. try_to_release_page, 20#include <linux/buffer_head.h> /* grr. try_to_release_page,
21 do_invalidatepage */ 21 do_invalidatepage */
22#include <linux/cleancache.h> 22#include <linux/cleancache.h>
23#include <linux/rmap.h>
23#include "internal.h" 24#include "internal.h"
24 25
25static void clear_exceptional_entry(struct address_space *mapping, 26static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,12 +720,68 @@ EXPORT_SYMBOL(truncate_pagecache);
719 */ 720 */
720void truncate_setsize(struct inode *inode, loff_t newsize) 721void truncate_setsize(struct inode *inode, loff_t newsize)
721{ 722{
723 loff_t oldsize = inode->i_size;
724
722 i_size_write(inode, newsize); 725 i_size_write(inode, newsize);
726 if (newsize > oldsize)
727 pagecache_isize_extended(inode, oldsize, newsize);
723 truncate_pagecache(inode, newsize); 728 truncate_pagecache(inode, newsize);
724} 729}
725EXPORT_SYMBOL(truncate_setsize); 730EXPORT_SYMBOL(truncate_setsize);
726 731
727/** 732/**
733 * pagecache_isize_extended - update pagecache after extension of i_size
734 * @inode: inode for which i_size was extended
735 * @from: original inode size
736 * @to: new inode size
737 *
738 * Handle extension of inode size either caused by extending truncate or by
739 * write starting after current i_size. We mark the page straddling current
740 * i_size RO so that page_mkwrite() is called on the nearest write access to
741 * the page. This way filesystem can be sure that page_mkwrite() is called on
742 * the page before user writes to the page via mmap after the i_size has been
743 * changed.
744 *
745 * The function must be called after i_size is updated so that page fault
746 * coming after we unlock the page will already see the new i_size.
747 * The function must be called while we still hold i_mutex - this not only
748 * makes sure i_size is stable but also that userspace cannot observe new
749 * i_size value before we are prepared to store mmap writes at new inode size.
750 */
751void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
752{
753 int bsize = 1 << inode->i_blkbits;
754 loff_t rounded_from;
755 struct page *page;
756 pgoff_t index;
757
758 WARN_ON(!mutex_is_locked(&inode->i_mutex));
759 WARN_ON(to > inode->i_size);
760
761 if (from >= to || bsize == PAGE_CACHE_SIZE)
762 return;
763 /* Page straddling @from will not have any hole block created? */
764 rounded_from = round_up(from, bsize);
765 if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
766 return;
767
768 index = from >> PAGE_CACHE_SHIFT;
769 page = find_lock_page(inode->i_mapping, index);
770 /* Page not cached? Nothing to do */
771 if (!page)
772 return;
773 /*
774 * See clear_page_dirty_for_io() for details why set_page_dirty()
775 * is needed.
776 */
777 if (page_mkclean(page))
778 set_page_dirty(page);
779 unlock_page(page);
780 page_cache_release(page);
781}
782EXPORT_SYMBOL(pagecache_isize_extended);
783
784/**
728 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 785 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
729 * @inode: inode 786 * @inode: inode
730 * @lstart: offset of beginning of hole 787 * @lstart: offset of beginning of hole