diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/balloon_compaction.c | 2 | ||||
-rw-r--r-- | mm/compaction.c | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 15 | ||||
-rw-r--r-- | mm/memcontrol.c | 105 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/oom_kill.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 43 | ||||
-rw-r--r-- | mm/page_alloc.c | 8 | ||||
-rw-r--r-- | mm/page_cgroup.c | 1 | ||||
-rw-r--r-- | mm/rmap.c | 88 | ||||
-rw-r--r-- | mm/shmem.c | 36 | ||||
-rw-r--r-- | mm/slab_common.c | 10 | ||||
-rw-r--r-- | mm/truncate.c | 57 |
15 files changed, 259 insertions, 140 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b3cbe19f71b5..fcad8322ef36 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c | |||
@@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) | |||
68 | * to be released by the balloon driver. | 68 | * to be released by the balloon driver. |
69 | */ | 69 | */ |
70 | if (trylock_page(page)) { | 70 | if (trylock_page(page)) { |
71 | #ifdef CONFIG_BALLOON_COMPACTION | ||
71 | if (!PagePrivate(page)) { | 72 | if (!PagePrivate(page)) { |
72 | /* raced with isolation */ | 73 | /* raced with isolation */ |
73 | unlock_page(page); | 74 | unlock_page(page); |
74 | continue; | 75 | continue; |
75 | } | 76 | } |
77 | #endif | ||
76 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | 78 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); |
77 | balloon_page_delete(page); | 79 | balloon_page_delete(page); |
78 | __count_vm_event(BALLOON_DEFLATE); | 80 | __count_vm_event(BALLOON_DEFLATE); |
diff --git a/mm/compaction.c b/mm/compaction.c index edba18aed173..ec74cf0123ef 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -784,6 +784,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, | |||
784 | cc->nr_migratepages = 0; | 784 | cc->nr_migratepages = 0; |
785 | break; | 785 | break; |
786 | } | 786 | } |
787 | |||
788 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) | ||
789 | break; | ||
787 | } | 790 | } |
788 | acct_isolated(cc->zone, cc); | 791 | acct_isolated(cc->zone, cc); |
789 | 792 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 74c78aa8bc2f..de984159cf0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -200,7 +200,7 @@ retry: | |||
200 | preempt_disable(); | 200 | preempt_disable(); |
201 | if (cmpxchg(&huge_zero_page, NULL, zero_page)) { | 201 | if (cmpxchg(&huge_zero_page, NULL, zero_page)) { |
202 | preempt_enable(); | 202 | preempt_enable(); |
203 | __free_page(zero_page); | 203 | __free_pages(zero_page, compound_order(zero_page)); |
204 | goto retry; | 204 | goto retry; |
205 | } | 205 | } |
206 | 206 | ||
@@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, | |||
232 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { | 232 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { |
233 | struct page *zero_page = xchg(&huge_zero_page, NULL); | 233 | struct page *zero_page = xchg(&huge_zero_page, NULL); |
234 | BUG_ON(zero_page == NULL); | 234 | BUG_ON(zero_page == NULL); |
235 | __free_page(zero_page); | 235 | __free_pages(zero_page, compound_order(zero_page)); |
236 | return HPAGE_PMD_NR; | 236 | return HPAGE_PMD_NR; |
237 | } | 237 | } |
238 | 238 | ||
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
803 | return VM_FAULT_FALLBACK; | 803 | return VM_FAULT_FALLBACK; |
804 | if (unlikely(anon_vma_prepare(vma))) | 804 | if (unlikely(anon_vma_prepare(vma))) |
805 | return VM_FAULT_OOM; | 805 | return VM_FAULT_OOM; |
806 | if (unlikely(khugepaged_enter(vma))) | 806 | if (unlikely(khugepaged_enter(vma, vma->vm_flags))) |
807 | return VM_FAULT_OOM; | 807 | return VM_FAULT_OOM; |
808 | if (!(flags & FAULT_FLAG_WRITE) && | 808 | if (!(flags & FAULT_FLAG_WRITE) && |
809 | transparent_hugepage_use_zero_page()) { | 809 | transparent_hugepage_use_zero_page()) { |
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1970 | * register it here without waiting a page fault that | 1970 | * register it here without waiting a page fault that |
1971 | * may not happen any time soon. | 1971 | * may not happen any time soon. |
1972 | */ | 1972 | */ |
1973 | if (unlikely(khugepaged_enter_vma_merge(vma))) | 1973 | if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) |
1974 | return -ENOMEM; | 1974 | return -ENOMEM; |
1975 | break; | 1975 | break; |
1976 | case MADV_NOHUGEPAGE: | 1976 | case MADV_NOHUGEPAGE: |
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm) | |||
2071 | return 0; | 2071 | return 0; |
2072 | } | 2072 | } |
2073 | 2073 | ||
2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | 2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma, |
2075 | unsigned long vm_flags) | ||
2075 | { | 2076 | { |
2076 | unsigned long hstart, hend; | 2077 | unsigned long hstart, hend; |
2077 | if (!vma->anon_vma) | 2078 | if (!vma->anon_vma) |
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
2083 | if (vma->vm_ops) | 2084 | if (vma->vm_ops) |
2084 | /* khugepaged not yet working on file or special mappings */ | 2085 | /* khugepaged not yet working on file or special mappings */ |
2085 | return 0; | 2086 | return 0; |
2086 | VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); | 2087 | VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); |
2087 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2088 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2088 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2089 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2089 | if (hstart < hend) | 2090 | if (hstart < hend) |
2090 | return khugepaged_enter(vma); | 2091 | return khugepaged_enter(vma, vm_flags); |
2091 | return 0; | 2092 | return 0; |
2092 | } | 2093 | } |
2093 | 2094 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23976fd885fd..d6ac0e33e150 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) | |||
1536 | * start move here. | 1536 | * start move here. |
1537 | */ | 1537 | */ |
1538 | 1538 | ||
1539 | /* for quick checking without looking up memcg */ | ||
1540 | atomic_t memcg_moving __read_mostly; | ||
1541 | |||
1542 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) | 1539 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) |
1543 | { | 1540 | { |
1544 | atomic_inc(&memcg_moving); | ||
1545 | atomic_inc(&memcg->moving_account); | 1541 | atomic_inc(&memcg->moving_account); |
1546 | synchronize_rcu(); | 1542 | synchronize_rcu(); |
1547 | } | 1543 | } |
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg) | |||
1552 | * Now, mem_cgroup_clear_mc() may call this function with NULL. | 1548 | * Now, mem_cgroup_clear_mc() may call this function with NULL. |
1553 | * We check NULL in callee rather than caller. | 1549 | * We check NULL in callee rather than caller. |
1554 | */ | 1550 | */ |
1555 | if (memcg) { | 1551 | if (memcg) |
1556 | atomic_dec(&memcg_moving); | ||
1557 | atomic_dec(&memcg->moving_account); | 1552 | atomic_dec(&memcg->moving_account); |
1558 | } | ||
1559 | } | 1553 | } |
1560 | 1554 | ||
1561 | /* | 1555 | /* |
@@ -2204,41 +2198,52 @@ cleanup: | |||
2204 | return true; | 2198 | return true; |
2205 | } | 2199 | } |
2206 | 2200 | ||
2207 | /* | 2201 | /** |
2208 | * Used to update mapped file or writeback or other statistics. | 2202 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction |
2203 | * @page: page that is going to change accounted state | ||
2204 | * @locked: &memcg->move_lock slowpath was taken | ||
2205 | * @flags: IRQ-state flags for &memcg->move_lock | ||
2209 | * | 2206 | * |
2210 | * Notes: Race condition | 2207 | * This function must mark the beginning of an accounted page state |
2208 | * change to prevent double accounting when the page is concurrently | ||
2209 | * being moved to another memcg: | ||
2211 | * | 2210 | * |
2212 | * Charging occurs during page instantiation, while the page is | 2211 | * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); |
2213 | * unmapped and locked in page migration, or while the page table is | 2212 | * if (TestClearPageState(page)) |
2214 | * locked in THP migration. No race is possible. | 2213 | * mem_cgroup_update_page_stat(memcg, state, -1); |
2214 | * mem_cgroup_end_page_stat(memcg, locked, flags); | ||
2215 | * | 2215 | * |
2216 | * Uncharge happens to pages with zero references, no race possible. | 2216 | * The RCU lock is held throughout the transaction. The fast path can |
2217 | * get away without acquiring the memcg->move_lock (@locked is false) | ||
2218 | * because page moving starts with an RCU grace period. | ||
2217 | * | 2219 | * |
2218 | * Charge moving between groups is protected by checking mm->moving | 2220 | * The RCU lock also protects the memcg from being freed when the page |
2219 | * account and taking the move_lock in the slowpath. | 2221 | * state that is going to change is the only thing preventing the page |
2222 | * from being uncharged. E.g. end-writeback clearing PageWriteback(), | ||
2223 | * which allows migration to go ahead and uncharge the page before the | ||
2224 | * account transaction might be complete. | ||
2220 | */ | 2225 | */ |
2221 | 2226 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, | |
2222 | void __mem_cgroup_begin_update_page_stat(struct page *page, | 2227 | bool *locked, |
2223 | bool *locked, unsigned long *flags) | 2228 | unsigned long *flags) |
2224 | { | 2229 | { |
2225 | struct mem_cgroup *memcg; | 2230 | struct mem_cgroup *memcg; |
2226 | struct page_cgroup *pc; | 2231 | struct page_cgroup *pc; |
2227 | 2232 | ||
2233 | rcu_read_lock(); | ||
2234 | |||
2235 | if (mem_cgroup_disabled()) | ||
2236 | return NULL; | ||
2237 | |||
2228 | pc = lookup_page_cgroup(page); | 2238 | pc = lookup_page_cgroup(page); |
2229 | again: | 2239 | again: |
2230 | memcg = pc->mem_cgroup; | 2240 | memcg = pc->mem_cgroup; |
2231 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | 2241 | if (unlikely(!memcg || !PageCgroupUsed(pc))) |
2232 | return; | 2242 | return NULL; |
2233 | /* | 2243 | |
2234 | * If this memory cgroup is not under account moving, we don't | 2244 | *locked = false; |
2235 | * need to take move_lock_mem_cgroup(). Because we already hold | ||
2236 | * rcu_read_lock(), any calls to move_account will be delayed until | ||
2237 | * rcu_read_unlock(). | ||
2238 | */ | ||
2239 | VM_BUG_ON(!rcu_read_lock_held()); | ||
2240 | if (atomic_read(&memcg->moving_account) <= 0) | 2245 | if (atomic_read(&memcg->moving_account) <= 0) |
2241 | return; | 2246 | return memcg; |
2242 | 2247 | ||
2243 | move_lock_mem_cgroup(memcg, flags); | 2248 | move_lock_mem_cgroup(memcg, flags); |
2244 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { | 2249 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { |
@@ -2246,36 +2251,40 @@ again: | |||
2246 | goto again; | 2251 | goto again; |
2247 | } | 2252 | } |
2248 | *locked = true; | 2253 | *locked = true; |
2254 | |||
2255 | return memcg; | ||
2249 | } | 2256 | } |
2250 | 2257 | ||
2251 | void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) | 2258 | /** |
2259 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | ||
2260 | * @memcg: the memcg that was accounted against | ||
2261 | * @locked: value received from mem_cgroup_begin_page_stat() | ||
2262 | * @flags: value received from mem_cgroup_begin_page_stat() | ||
2263 | */ | ||
2264 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, | ||
2265 | unsigned long flags) | ||
2252 | { | 2266 | { |
2253 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2267 | if (memcg && locked) |
2268 | move_unlock_mem_cgroup(memcg, &flags); | ||
2254 | 2269 | ||
2255 | /* | 2270 | rcu_read_unlock(); |
2256 | * It's guaranteed that pc->mem_cgroup never changes while | ||
2257 | * lock is held because a routine modifies pc->mem_cgroup | ||
2258 | * should take move_lock_mem_cgroup(). | ||
2259 | */ | ||
2260 | move_unlock_mem_cgroup(pc->mem_cgroup, flags); | ||
2261 | } | 2271 | } |
2262 | 2272 | ||
2263 | void mem_cgroup_update_page_stat(struct page *page, | 2273 | /** |
2274 | * mem_cgroup_update_page_stat - update page state statistics | ||
2275 | * @memcg: memcg to account against | ||
2276 | * @idx: page state item to account | ||
2277 | * @val: number of pages (positive or negative) | ||
2278 | * | ||
2279 | * See mem_cgroup_begin_page_stat() for locking requirements. | ||
2280 | */ | ||
2281 | void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | ||
2264 | enum mem_cgroup_stat_index idx, int val) | 2282 | enum mem_cgroup_stat_index idx, int val) |
2265 | { | 2283 | { |
2266 | struct mem_cgroup *memcg; | ||
2267 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
2268 | unsigned long uninitialized_var(flags); | ||
2269 | |||
2270 | if (mem_cgroup_disabled()) | ||
2271 | return; | ||
2272 | |||
2273 | VM_BUG_ON(!rcu_read_lock_held()); | 2284 | VM_BUG_ON(!rcu_read_lock_held()); |
2274 | memcg = pc->mem_cgroup; | ||
2275 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | ||
2276 | return; | ||
2277 | 2285 | ||
2278 | this_cpu_add(memcg->stat->count[idx], val); | 2286 | if (memcg) |
2287 | this_cpu_add(memcg->stat->count[idx], val); | ||
2279 | } | 2288 | } |
2280 | 2289 | ||
2281 | /* | 2290 | /* |
diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfbd872e..3e503831e042 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1147,6 +1147,7 @@ again: | |||
1147 | print_bad_pte(vma, addr, ptent, page); | 1147 | print_bad_pte(vma, addr, ptent, page); |
1148 | if (unlikely(!__tlb_remove_page(tlb, page))) { | 1148 | if (unlikely(!__tlb_remove_page(tlb, page))) { |
1149 | force_flush = 1; | 1149 | force_flush = 1; |
1150 | addr += PAGE_SIZE; | ||
1150 | break; | 1151 | break; |
1151 | } | 1152 | } |
1152 | continue; | 1153 | continue; |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 29d8693d0c61..252e1dbbed86 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1912,7 +1912,6 @@ void try_offline_node(int nid) | |||
1912 | unsigned long start_pfn = pgdat->node_start_pfn; | 1912 | unsigned long start_pfn = pgdat->node_start_pfn; |
1913 | unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; | 1913 | unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; |
1914 | unsigned long pfn; | 1914 | unsigned long pfn; |
1915 | struct page *pgdat_page = virt_to_page(pgdat); | ||
1916 | int i; | 1915 | int i; |
1917 | 1916 | ||
1918 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | 1917 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
@@ -1941,10 +1940,6 @@ void try_offline_node(int nid) | |||
1941 | node_set_offline(nid); | 1940 | node_set_offline(nid); |
1942 | unregister_one_node(nid); | 1941 | unregister_one_node(nid); |
1943 | 1942 | ||
1944 | if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page)) | ||
1945 | /* node data is allocated from boot memory */ | ||
1946 | return; | ||
1947 | |||
1948 | /* free waittable in each zone */ | 1943 | /* free waittable in each zone */ |
1949 | for (i = 0; i < MAX_NR_ZONES; i++) { | 1944 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1950 | struct zone *zone = pgdat->node_zones + i; | 1945 | struct zone *zone = pgdat->node_zones + i; |
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1080 | end, prev->vm_pgoff, NULL); | 1080 | end, prev->vm_pgoff, NULL); |
1081 | if (err) | 1081 | if (err) |
1082 | return NULL; | 1082 | return NULL; |
1083 | khugepaged_enter_vma_merge(prev); | 1083 | khugepaged_enter_vma_merge(prev, vm_flags); |
1084 | return prev; | 1084 | return prev; |
1085 | } | 1085 | } |
1086 | 1086 | ||
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1099 | next->vm_pgoff - pglen, NULL); | 1099 | next->vm_pgoff - pglen, NULL); |
1100 | if (err) | 1100 | if (err) |
1101 | return NULL; | 1101 | return NULL; |
1102 | khugepaged_enter_vma_merge(area); | 1102 | khugepaged_enter_vma_merge(area, vm_flags); |
1103 | return area; | 1103 | return area; |
1104 | } | 1104 | } |
1105 | 1105 | ||
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
2208 | } | 2208 | } |
2209 | } | 2209 | } |
2210 | vma_unlock_anon_vma(vma); | 2210 | vma_unlock_anon_vma(vma); |
2211 | khugepaged_enter_vma_merge(vma); | 2211 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2212 | validate_mm(vma->vm_mm); | 2212 | validate_mm(vma->vm_mm); |
2213 | return error; | 2213 | return error; |
2214 | } | 2214 | } |
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
2277 | } | 2277 | } |
2278 | } | 2278 | } |
2279 | vma_unlock_anon_vma(vma); | 2279 | vma_unlock_anon_vma(vma); |
2280 | khugepaged_enter_vma_merge(vma); | 2280 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2281 | validate_mm(vma->vm_mm); | 2281 | validate_mm(vma->vm_mm); |
2282 | return error; | 2282 | return error; |
2283 | } | 2283 | } |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index bbf405a3a18f..5340f6b91312 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
404 | dump_tasks(memcg, nodemask); | 404 | dump_tasks(memcg, nodemask); |
405 | } | 405 | } |
406 | 406 | ||
407 | /* | ||
408 | * Number of OOM killer invocations (including memcg OOM killer). | ||
409 | * Primarily used by PM freezer to check for potential races with | ||
410 | * OOM killed frozen task. | ||
411 | */ | ||
412 | static atomic_t oom_kills = ATOMIC_INIT(0); | ||
413 | |||
414 | int oom_kills_count(void) | ||
415 | { | ||
416 | return atomic_read(&oom_kills); | ||
417 | } | ||
418 | |||
419 | void note_oom_kill(void) | ||
420 | { | ||
421 | atomic_inc(&oom_kills); | ||
422 | } | ||
423 | |||
407 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 424 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
408 | /* | 425 | /* |
409 | * Must be called while holding a reference to p, which will be released upon | 426 | * Must be called while holding a reference to p, which will be released upon |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ff24c9d83112..19ceae87522d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2116,23 +2116,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
2116 | EXPORT_SYMBOL(account_page_dirtied); | 2116 | EXPORT_SYMBOL(account_page_dirtied); |
2117 | 2117 | ||
2118 | /* | 2118 | /* |
2119 | * Helper function for set_page_writeback family. | ||
2120 | * | ||
2121 | * The caller must hold mem_cgroup_begin/end_update_page_stat() lock | ||
2122 | * while calling this function. | ||
2123 | * See test_set_page_writeback for example. | ||
2124 | * | ||
2125 | * NOTE: Unlike account_page_dirtied this does not rely on being atomic | ||
2126 | * wrt interrupts. | ||
2127 | */ | ||
2128 | void account_page_writeback(struct page *page) | ||
2129 | { | ||
2130 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | ||
2131 | inc_zone_page_state(page, NR_WRITEBACK); | ||
2132 | } | ||
2133 | EXPORT_SYMBOL(account_page_writeback); | ||
2134 | |||
2135 | /* | ||
2136 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 2119 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
2137 | * its radix tree. | 2120 | * its radix tree. |
2138 | * | 2121 | * |
@@ -2344,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); | |||
2344 | int test_clear_page_writeback(struct page *page) | 2327 | int test_clear_page_writeback(struct page *page) |
2345 | { | 2328 | { |
2346 | struct address_space *mapping = page_mapping(page); | 2329 | struct address_space *mapping = page_mapping(page); |
2347 | int ret; | ||
2348 | bool locked; | ||
2349 | unsigned long memcg_flags; | 2330 | unsigned long memcg_flags; |
2331 | struct mem_cgroup *memcg; | ||
2332 | bool locked; | ||
2333 | int ret; | ||
2350 | 2334 | ||
2351 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | 2335 | memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); |
2352 | if (mapping) { | 2336 | if (mapping) { |
2353 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2337 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2354 | unsigned long flags; | 2338 | unsigned long flags; |
@@ -2369,22 +2353,23 @@ int test_clear_page_writeback(struct page *page) | |||
2369 | ret = TestClearPageWriteback(page); | 2353 | ret = TestClearPageWriteback(page); |
2370 | } | 2354 | } |
2371 | if (ret) { | 2355 | if (ret) { |
2372 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | 2356 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); |
2373 | dec_zone_page_state(page, NR_WRITEBACK); | 2357 | dec_zone_page_state(page, NR_WRITEBACK); |
2374 | inc_zone_page_state(page, NR_WRITTEN); | 2358 | inc_zone_page_state(page, NR_WRITTEN); |
2375 | } | 2359 | } |
2376 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | 2360 | mem_cgroup_end_page_stat(memcg, locked, memcg_flags); |
2377 | return ret; | 2361 | return ret; |
2378 | } | 2362 | } |
2379 | 2363 | ||
2380 | int __test_set_page_writeback(struct page *page, bool keep_write) | 2364 | int __test_set_page_writeback(struct page *page, bool keep_write) |
2381 | { | 2365 | { |
2382 | struct address_space *mapping = page_mapping(page); | 2366 | struct address_space *mapping = page_mapping(page); |
2383 | int ret; | ||
2384 | bool locked; | ||
2385 | unsigned long memcg_flags; | 2367 | unsigned long memcg_flags; |
2368 | struct mem_cgroup *memcg; | ||
2369 | bool locked; | ||
2370 | int ret; | ||
2386 | 2371 | ||
2387 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | 2372 | memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); |
2388 | if (mapping) { | 2373 | if (mapping) { |
2389 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2374 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2390 | unsigned long flags; | 2375 | unsigned long flags; |
@@ -2410,9 +2395,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write) | |||
2410 | } else { | 2395 | } else { |
2411 | ret = TestSetPageWriteback(page); | 2396 | ret = TestSetPageWriteback(page); |
2412 | } | 2397 | } |
2413 | if (!ret) | 2398 | if (!ret) { |
2414 | account_page_writeback(page); | 2399 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); |
2415 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | 2400 | inc_zone_page_state(page, NR_WRITEBACK); |
2401 | } | ||
2402 | mem_cgroup_end_page_stat(memcg, locked, memcg_flags); | ||
2416 | return ret; | 2403 | return ret; |
2417 | 2404 | ||
2418 | } | 2405 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 736d8e1b6381..9cd36b822444 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2252,6 +2252,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2252 | } | 2252 | } |
2253 | 2253 | ||
2254 | /* | 2254 | /* |
2255 | * PM-freezer should be notified that there might be an OOM killer on | ||
2256 | * its way to kill and wake somebody up. This is too early and we might | ||
2257 | * end up not killing anything but false positives are acceptable. | ||
2258 | * See freeze_processes. | ||
2259 | */ | ||
2260 | note_oom_kill(); | ||
2261 | |||
2262 | /* | ||
2255 | * Go through the zonelist yet one more time, keep very high watermark | 2263 | * Go through the zonelist yet one more time, keep very high watermark |
2256 | * here, this is only to catch a parallel oom killing, we must fail if | 2264 | * here, this is only to catch a parallel oom killing, we must fail if |
2257 | * we're still under heavy pressure. | 2265 | * we're still under heavy pressure. |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 3708264d2833..5331c2bd85a2 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -171,6 +171,7 @@ static void free_page_cgroup(void *addr) | |||
171 | sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 171 | sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
172 | 172 | ||
173 | BUG_ON(PageReserved(page)); | 173 | BUG_ON(PageReserved(page)); |
174 | kmemleak_free(addr); | ||
174 | free_pages_exact(addr, table_size); | 175 | free_pages_exact(addr, table_size); |
175 | } | 176 | } |
176 | } | 177 | } |
@@ -1042,15 +1042,46 @@ void page_add_new_anon_rmap(struct page *page, | |||
1042 | */ | 1042 | */ |
1043 | void page_add_file_rmap(struct page *page) | 1043 | void page_add_file_rmap(struct page *page) |
1044 | { | 1044 | { |
1045 | bool locked; | 1045 | struct mem_cgroup *memcg; |
1046 | unsigned long flags; | 1046 | unsigned long flags; |
1047 | bool locked; | ||
1047 | 1048 | ||
1048 | mem_cgroup_begin_update_page_stat(page, &locked, &flags); | 1049 | memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); |
1049 | if (atomic_inc_and_test(&page->_mapcount)) { | 1050 | if (atomic_inc_and_test(&page->_mapcount)) { |
1050 | __inc_zone_page_state(page, NR_FILE_MAPPED); | 1051 | __inc_zone_page_state(page, NR_FILE_MAPPED); |
1051 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | 1052 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); |
1052 | } | 1053 | } |
1053 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | 1054 | mem_cgroup_end_page_stat(memcg, locked, flags); |
1055 | } | ||
1056 | |||
1057 | static void page_remove_file_rmap(struct page *page) | ||
1058 | { | ||
1059 | struct mem_cgroup *memcg; | ||
1060 | unsigned long flags; | ||
1061 | bool locked; | ||
1062 | |||
1063 | memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); | ||
1064 | |||
1065 | /* page still mapped by someone else? */ | ||
1066 | if (!atomic_add_negative(-1, &page->_mapcount)) | ||
1067 | goto out; | ||
1068 | |||
1069 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ | ||
1070 | if (unlikely(PageHuge(page))) | ||
1071 | goto out; | ||
1072 | |||
1073 | /* | ||
1074 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | ||
1075 | * these counters are not modified in interrupt context, and | ||
1076 | * pte lock(a spinlock) is held, which implies preemption disabled. | ||
1077 | */ | ||
1078 | __dec_zone_page_state(page, NR_FILE_MAPPED); | ||
1079 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | ||
1080 | |||
1081 | if (unlikely(PageMlocked(page))) | ||
1082 | clear_page_mlock(page); | ||
1083 | out: | ||
1084 | mem_cgroup_end_page_stat(memcg, locked, flags); | ||
1054 | } | 1085 | } |
1055 | 1086 | ||
1056 | /** | 1087 | /** |
@@ -1061,46 +1092,33 @@ void page_add_file_rmap(struct page *page) | |||
1061 | */ | 1092 | */ |
1062 | void page_remove_rmap(struct page *page) | 1093 | void page_remove_rmap(struct page *page) |
1063 | { | 1094 | { |
1064 | bool anon = PageAnon(page); | 1095 | if (!PageAnon(page)) { |
1065 | bool locked; | 1096 | page_remove_file_rmap(page); |
1066 | unsigned long flags; | 1097 | return; |
1067 | 1098 | } | |
1068 | /* | ||
1069 | * The anon case has no mem_cgroup page_stat to update; but may | ||
1070 | * uncharge_page() below, where the lock ordering can deadlock if | ||
1071 | * we hold the lock against page_stat move: so avoid it on anon. | ||
1072 | */ | ||
1073 | if (!anon) | ||
1074 | mem_cgroup_begin_update_page_stat(page, &locked, &flags); | ||
1075 | 1099 | ||
1076 | /* page still mapped by someone else? */ | 1100 | /* page still mapped by someone else? */ |
1077 | if (!atomic_add_negative(-1, &page->_mapcount)) | 1101 | if (!atomic_add_negative(-1, &page->_mapcount)) |
1078 | goto out; | 1102 | return; |
1103 | |||
1104 | /* Hugepages are not counted in NR_ANON_PAGES for now. */ | ||
1105 | if (unlikely(PageHuge(page))) | ||
1106 | return; | ||
1079 | 1107 | ||
1080 | /* | 1108 | /* |
1081 | * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED | ||
1082 | * and not charged by memcg for now. | ||
1083 | * | ||
1084 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | 1109 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
1085 | * these counters are not modified in interrupt context, and | 1110 | * these counters are not modified in interrupt context, and |
1086 | * these counters are not modified in interrupt context, and | ||
1087 | * pte lock(a spinlock) is held, which implies preemption disabled. | 1111 | * pte lock(a spinlock) is held, which implies preemption disabled. |
1088 | */ | 1112 | */ |
1089 | if (unlikely(PageHuge(page))) | 1113 | if (PageTransHuge(page)) |
1090 | goto out; | 1114 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1091 | if (anon) { | 1115 | |
1092 | if (PageTransHuge(page)) | 1116 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, |
1093 | __dec_zone_page_state(page, | 1117 | -hpage_nr_pages(page)); |
1094 | NR_ANON_TRANSPARENT_HUGEPAGES); | 1118 | |
1095 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | ||
1096 | -hpage_nr_pages(page)); | ||
1097 | } else { | ||
1098 | __dec_zone_page_state(page, NR_FILE_MAPPED); | ||
1099 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | ||
1100 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | ||
1101 | } | ||
1102 | if (unlikely(PageMlocked(page))) | 1119 | if (unlikely(PageMlocked(page))) |
1103 | clear_page_mlock(page); | 1120 | clear_page_mlock(page); |
1121 | |||
1104 | /* | 1122 | /* |
1105 | * It would be tidy to reset the PageAnon mapping here, | 1123 | * It would be tidy to reset the PageAnon mapping here, |
1106 | * but that might overwrite a racing page_add_anon_rmap | 1124 | * but that might overwrite a racing page_add_anon_rmap |
@@ -1110,10 +1128,6 @@ void page_remove_rmap(struct page *page) | |||
1110 | * Leaving it set also helps swapoff to reinstate ptes | 1128 | * Leaving it set also helps swapoff to reinstate ptes |
1111 | * faster for those pages still in swapcache. | 1129 | * faster for those pages still in swapcache. |
1112 | */ | 1130 | */ |
1113 | return; | ||
1114 | out: | ||
1115 | if (!anon) | ||
1116 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | ||
1117 | } | 1131 | } |
1118 | 1132 | ||
1119 | /* | 1133 | /* |
diff --git a/mm/shmem.c b/mm/shmem.c index cd6fc7590e54..185836ba53ef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2345,6 +2345,32 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru | |||
2345 | return 0; | 2345 | return 0; |
2346 | } | 2346 | } |
2347 | 2347 | ||
2348 | static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry) | ||
2349 | { | ||
2350 | struct dentry *whiteout; | ||
2351 | int error; | ||
2352 | |||
2353 | whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); | ||
2354 | if (!whiteout) | ||
2355 | return -ENOMEM; | ||
2356 | |||
2357 | error = shmem_mknod(old_dir, whiteout, | ||
2358 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
2359 | dput(whiteout); | ||
2360 | if (error) | ||
2361 | return error; | ||
2362 | |||
2363 | /* | ||
2364 | * Cheat and hash the whiteout while the old dentry is still in | ||
2365 | * place, instead of playing games with FS_RENAME_DOES_D_MOVE. | ||
2366 | * | ||
2367 | * d_lookup() will consistently find one of them at this point, | ||
2368 | * not sure which one, but that isn't even important. | ||
2369 | */ | ||
2370 | d_rehash(whiteout); | ||
2371 | return 0; | ||
2372 | } | ||
2373 | |||
2348 | /* | 2374 | /* |
2349 | * The VFS layer already does all the dentry stuff for rename, | 2375 | * The VFS layer already does all the dentry stuff for rename, |
2350 | * we just have to decrement the usage count for the target if | 2376 | * we just have to decrement the usage count for the target if |
@@ -2356,7 +2382,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
2356 | struct inode *inode = old_dentry->d_inode; | 2382 | struct inode *inode = old_dentry->d_inode; |
2357 | int they_are_dirs = S_ISDIR(inode->i_mode); | 2383 | int they_are_dirs = S_ISDIR(inode->i_mode); |
2358 | 2384 | ||
2359 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 2385 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
2360 | return -EINVAL; | 2386 | return -EINVAL; |
2361 | 2387 | ||
2362 | if (flags & RENAME_EXCHANGE) | 2388 | if (flags & RENAME_EXCHANGE) |
@@ -2365,6 +2391,14 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
2365 | if (!simple_empty(new_dentry)) | 2391 | if (!simple_empty(new_dentry)) |
2366 | return -ENOTEMPTY; | 2392 | return -ENOTEMPTY; |
2367 | 2393 | ||
2394 | if (flags & RENAME_WHITEOUT) { | ||
2395 | int error; | ||
2396 | |||
2397 | error = shmem_whiteout(old_dir, old_dentry); | ||
2398 | if (error) | ||
2399 | return error; | ||
2400 | } | ||
2401 | |||
2368 | if (new_dentry->d_inode) { | 2402 | if (new_dentry->d_inode) { |
2369 | (void) shmem_unlink(new_dir, new_dentry); | 2403 | (void) shmem_unlink(new_dir, new_dentry); |
2370 | if (they_are_dirs) { | 2404 | if (they_are_dirs) { |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 3a6e0cfdf03a..406944207b61 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -93,16 +93,6 @@ static int kmem_cache_sanity_check(const char *name, size_t size) | |||
93 | s->object_size); | 93 | s->object_size); |
94 | continue; | 94 | continue; |
95 | } | 95 | } |
96 | |||
97 | #if !defined(CONFIG_SLUB) | ||
98 | if (!strcmp(s->name, name)) { | ||
99 | pr_err("%s (%s): Cache name already exists.\n", | ||
100 | __func__, name); | ||
101 | dump_stack(); | ||
102 | s = NULL; | ||
103 | return -EINVAL; | ||
104 | } | ||
105 | #endif | ||
106 | } | 96 | } |
107 | 97 | ||
108 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ | 98 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |
diff --git a/mm/truncate.c b/mm/truncate.c index 96d167372d89..261eaf6e5a19 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, | 20 | #include <linux/buffer_head.h> /* grr. try_to_release_page, |
21 | do_invalidatepage */ | 21 | do_invalidatepage */ |
22 | #include <linux/cleancache.h> | 22 | #include <linux/cleancache.h> |
23 | #include <linux/rmap.h> | ||
23 | #include "internal.h" | 24 | #include "internal.h" |
24 | 25 | ||
25 | static void clear_exceptional_entry(struct address_space *mapping, | 26 | static void clear_exceptional_entry(struct address_space *mapping, |
@@ -719,12 +720,68 @@ EXPORT_SYMBOL(truncate_pagecache); | |||
719 | */ | 720 | */ |
720 | void truncate_setsize(struct inode *inode, loff_t newsize) | 721 | void truncate_setsize(struct inode *inode, loff_t newsize) |
721 | { | 722 | { |
723 | loff_t oldsize = inode->i_size; | ||
724 | |||
722 | i_size_write(inode, newsize); | 725 | i_size_write(inode, newsize); |
726 | if (newsize > oldsize) | ||
727 | pagecache_isize_extended(inode, oldsize, newsize); | ||
723 | truncate_pagecache(inode, newsize); | 728 | truncate_pagecache(inode, newsize); |
724 | } | 729 | } |
725 | EXPORT_SYMBOL(truncate_setsize); | 730 | EXPORT_SYMBOL(truncate_setsize); |
726 | 731 | ||
727 | /** | 732 | /** |
733 | * pagecache_isize_extended - update pagecache after extension of i_size | ||
734 | * @inode: inode for which i_size was extended | ||
735 | * @from: original inode size | ||
736 | * @to: new inode size | ||
737 | * | ||
738 | * Handle extension of inode size either caused by extending truncate or by | ||
739 | * write starting after current i_size. We mark the page straddling current | ||
740 | * i_size RO so that page_mkwrite() is called on the nearest write access to | ||
741 | * the page. This way filesystem can be sure that page_mkwrite() is called on | ||
742 | * the page before user writes to the page via mmap after the i_size has been | ||
743 | * changed. | ||
744 | * | ||
745 | * The function must be called after i_size is updated so that page fault | ||
746 | * coming after we unlock the page will already see the new i_size. | ||
747 | * The function must be called while we still hold i_mutex - this not only | ||
748 | * makes sure i_size is stable but also that userspace cannot observe new | ||
749 | * i_size value before we are prepared to store mmap writes at new inode size. | ||
750 | */ | ||
751 | void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) | ||
752 | { | ||
753 | int bsize = 1 << inode->i_blkbits; | ||
754 | loff_t rounded_from; | ||
755 | struct page *page; | ||
756 | pgoff_t index; | ||
757 | |||
758 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
759 | WARN_ON(to > inode->i_size); | ||
760 | |||
761 | if (from >= to || bsize == PAGE_CACHE_SIZE) | ||
762 | return; | ||
763 | /* Page straddling @from will not have any hole block created? */ | ||
764 | rounded_from = round_up(from, bsize); | ||
765 | if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1))) | ||
766 | return; | ||
767 | |||
768 | index = from >> PAGE_CACHE_SHIFT; | ||
769 | page = find_lock_page(inode->i_mapping, index); | ||
770 | /* Page not cached? Nothing to do */ | ||
771 | if (!page) | ||
772 | return; | ||
773 | /* | ||
774 | * See clear_page_dirty_for_io() for details why set_page_dirty() | ||
775 | * is needed. | ||
776 | */ | ||
777 | if (page_mkclean(page)) | ||
778 | set_page_dirty(page); | ||
779 | unlock_page(page); | ||
780 | page_cache_release(page); | ||
781 | } | ||
782 | EXPORT_SYMBOL(pagecache_isize_extended); | ||
783 | |||
784 | /** | ||
728 | * truncate_pagecache_range - unmap and remove pagecache that is hole-punched | 785 | * truncate_pagecache_range - unmap and remove pagecache that is hole-punched |
729 | * @inode: inode | 786 | * @inode: inode |
730 | * @lstart: offset of beginning of hole | 787 | * @lstart: offset of beginning of hole |