aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt12
-rw-r--r--arch/x86/mm/pgtable.c14
-rw-r--r--fs/proc/task_mmu.c9
-rw-r--r--include/linux/mm.h24
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--kernel/fork.c3
-rw-r--r--mm/debug.c3
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/memory.c15
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/oom_kill.c9
11 files changed, 75 insertions, 29 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 4415aa915681..e9c706e4627a 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -555,12 +555,12 @@ this is causing problems for your system/application.
555 555
556oom_dump_tasks 556oom_dump_tasks
557 557
558Enables a system-wide task dump (excluding kernel threads) to be 558Enables a system-wide task dump (excluding kernel threads) to be produced
559produced when the kernel performs an OOM-killing and includes such 559when the kernel performs an OOM-killing and includes such information as
560information as pid, uid, tgid, vm size, rss, nr_ptes, swapents, 560pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj
561oom_score_adj score, and name. This is helpful to determine why the 561score, and name. This is helpful to determine why the OOM killer was
562OOM killer was invoked, to identify the rogue task that caused it, 562invoked, to identify the rogue task that caused it, and to determine why
563and to determine why the OOM killer chose the task it did to kill. 563the OOM killer chose the task it did to kill.
564 564
565If this is set to zero, this information is suppressed. On very 565If this is set to zero, this information is suppressed. On very
566large systems with thousands of tasks it may not be feasible to dump 566large systems with thousands of tasks it may not be feasible to dump
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 6fb6927f9e76..7b22adaad4f1 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
190 190
191#endif /* CONFIG_X86_PAE */ 191#endif /* CONFIG_X86_PAE */
192 192
193static void free_pmds(pmd_t *pmds[]) 193static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
194{ 194{
195 int i; 195 int i;
196 196
@@ -198,10 +198,11 @@ static void free_pmds(pmd_t *pmds[])
198 if (pmds[i]) { 198 if (pmds[i]) {
199 pgtable_pmd_page_dtor(virt_to_page(pmds[i])); 199 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
200 free_page((unsigned long)pmds[i]); 200 free_page((unsigned long)pmds[i]);
201 mm_dec_nr_pmds(mm);
201 } 202 }
202} 203}
203 204
204static int preallocate_pmds(pmd_t *pmds[]) 205static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
205{ 206{
206 int i; 207 int i;
207 bool failed = false; 208 bool failed = false;
@@ -215,11 +216,13 @@ static int preallocate_pmds(pmd_t *pmds[])
215 pmd = NULL; 216 pmd = NULL;
216 failed = true; 217 failed = true;
217 } 218 }
219 if (pmd)
220 mm_inc_nr_pmds(mm);
218 pmds[i] = pmd; 221 pmds[i] = pmd;
219 } 222 }
220 223
221 if (failed) { 224 if (failed) {
222 free_pmds(pmds); 225 free_pmds(mm, pmds);
223 return -ENOMEM; 226 return -ENOMEM;
224 } 227 }
225 228
@@ -246,6 +249,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
246 249
247 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); 250 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
248 pmd_free(mm, pmd); 251 pmd_free(mm, pmd);
252 mm_dec_nr_pmds(mm);
249 } 253 }
250 } 254 }
251} 255}
@@ -283,7 +287,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
283 287
284 mm->pgd = pgd; 288 mm->pgd = pgd;
285 289
286 if (preallocate_pmds(pmds) != 0) 290 if (preallocate_pmds(mm, pmds) != 0)
287 goto out_free_pgd; 291 goto out_free_pgd;
288 292
289 if (paravirt_pgd_alloc(mm) != 0) 293 if (paravirt_pgd_alloc(mm) != 0)
@@ -304,7 +308,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
304 return pgd; 308 return pgd;
305 309
306out_free_pmds: 310out_free_pmds:
307 free_pmds(pmds); 311 free_pmds(mm, pmds);
308out_free_pgd: 312out_free_pgd:
309 free_page((unsigned long)pgd); 313 free_page((unsigned long)pgd);
310out: 314out:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6396f88c6687..e6e0abeb5d12 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -21,7 +21,7 @@
21 21
22void task_mem(struct seq_file *m, struct mm_struct *mm) 22void task_mem(struct seq_file *m, struct mm_struct *mm)
23{ 23{
24 unsigned long data, text, lib, swap; 24 unsigned long data, text, lib, swap, ptes, pmds;
25 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 25 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
26 26
27 /* 27 /*
@@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
42 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 42 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
43 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 43 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
44 swap = get_mm_counter(mm, MM_SWAPENTS); 44 swap = get_mm_counter(mm, MM_SWAPENTS);
45 ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
46 pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
45 seq_printf(m, 47 seq_printf(m,
46 "VmPeak:\t%8lu kB\n" 48 "VmPeak:\t%8lu kB\n"
47 "VmSize:\t%8lu kB\n" 49 "VmSize:\t%8lu kB\n"
@@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
54 "VmExe:\t%8lu kB\n" 56 "VmExe:\t%8lu kB\n"
55 "VmLib:\t%8lu kB\n" 57 "VmLib:\t%8lu kB\n"
56 "VmPTE:\t%8lu kB\n" 58 "VmPTE:\t%8lu kB\n"
59 "VmPMD:\t%8lu kB\n"
57 "VmSwap:\t%8lu kB\n", 60 "VmSwap:\t%8lu kB\n",
58 hiwater_vm << (PAGE_SHIFT-10), 61 hiwater_vm << (PAGE_SHIFT-10),
59 total_vm << (PAGE_SHIFT-10), 62 total_vm << (PAGE_SHIFT-10),
@@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
63 total_rss << (PAGE_SHIFT-10), 66 total_rss << (PAGE_SHIFT-10),
64 data << (PAGE_SHIFT-10), 67 data << (PAGE_SHIFT-10),
65 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 68 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
66 (PTRS_PER_PTE * sizeof(pte_t) * 69 ptes >> 10,
67 atomic_long_read(&mm->nr_ptes)) >> 10, 70 pmds >> 10,
68 swap << (PAGE_SHIFT-10)); 71 swap << (PAGE_SHIFT-10));
69} 72}
70 73
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c6bf813a6b3d..644990b83cda 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1438,8 +1438,32 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
1438{ 1438{
1439 return 0; 1439 return 0;
1440} 1440}
1441
1442static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
1443{
1444 return 0;
1445}
1446
1447static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
1448static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
1449
1441#else 1450#else
1442int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 1451int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
1452
1453static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
1454{
1455 return atomic_long_read(&mm->nr_pmds);
1456}
1457
1458static inline void mm_inc_nr_pmds(struct mm_struct *mm)
1459{
1460 atomic_long_inc(&mm->nr_pmds);
1461}
1462
1463static inline void mm_dec_nr_pmds(struct mm_struct *mm)
1464{
1465 atomic_long_dec(&mm->nr_pmds);
1466}
1443#endif 1467#endif
1444 1468
1445int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 1469int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 20ff2105b564..199a03aab8dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -363,7 +363,8 @@ struct mm_struct {
363 pgd_t * pgd; 363 pgd_t * pgd;
364 atomic_t mm_users; /* How many users with user space? */ 364 atomic_t mm_users; /* How many users with user space? */
365 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ 365 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
366 atomic_long_t nr_ptes; /* Page table pages */ 366 atomic_long_t nr_ptes; /* PTE page table pages */
367 atomic_long_t nr_pmds; /* PMD page table pages */
367 int map_count; /* number of VMAs */ 368 int map_count; /* number of VMAs */
368 369
369 spinlock_t page_table_lock; /* Protects page tables and some counters */ 370 spinlock_t page_table_lock; /* Protects page tables and some counters */
diff --git a/kernel/fork.c b/kernel/fork.c
index b379d9abddc7..c99098c52641 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -555,6 +555,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
555 INIT_LIST_HEAD(&mm->mmlist); 555 INIT_LIST_HEAD(&mm->mmlist);
556 mm->core_state = NULL; 556 mm->core_state = NULL;
557 atomic_long_set(&mm->nr_ptes, 0); 557 atomic_long_set(&mm->nr_ptes, 0);
558#ifndef __PAGETABLE_PMD_FOLDED
559 atomic_long_set(&mm->nr_pmds, 0);
560#endif
558 mm->map_count = 0; 561 mm->map_count = 0;
559 mm->locked_vm = 0; 562 mm->locked_vm = 0;
560 mm->pinned_vm = 0; 563 mm->pinned_vm = 0;
diff --git a/mm/debug.c b/mm/debug.c
index d69cb5a7ba9a..3eb3ac2fcee7 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -173,7 +173,7 @@ void dump_mm(const struct mm_struct *mm)
173 "get_unmapped_area %p\n" 173 "get_unmapped_area %p\n"
174#endif 174#endif
175 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" 175 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
176 "pgd %p mm_users %d mm_count %d nr_ptes %lu map_count %d\n" 176 "pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n"
177 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" 177 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
178 "pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n" 178 "pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n"
179 "start_code %lx end_code %lx start_data %lx end_data %lx\n" 179 "start_code %lx end_code %lx start_data %lx end_data %lx\n"
@@ -206,6 +206,7 @@ void dump_mm(const struct mm_struct *mm)
206 mm->pgd, atomic_read(&mm->mm_users), 206 mm->pgd, atomic_read(&mm->mm_users),
207 atomic_read(&mm->mm_count), 207 atomic_read(&mm->mm_count),
208 atomic_long_read((atomic_long_t *)&mm->nr_ptes), 208 atomic_long_read((atomic_long_t *)&mm->nr_ptes),
209 mm_nr_pmds((struct mm_struct *)mm),
209 mm->map_count, 210 mm->map_count,
210 mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, 211 mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
211 mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm, 212 mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fd28d6ba5e5d..0a9ac6c26832 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3598,6 +3598,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3598 if (saddr) { 3598 if (saddr) {
3599 spte = huge_pte_offset(svma->vm_mm, saddr); 3599 spte = huge_pte_offset(svma->vm_mm, saddr);
3600 if (spte) { 3600 if (spte) {
3601 mm_inc_nr_pmds(mm);
3601 get_page(virt_to_page(spte)); 3602 get_page(virt_to_page(spte));
3602 break; 3603 break;
3603 } 3604 }
@@ -3609,11 +3610,13 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3609 3610
3610 ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); 3611 ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
3611 spin_lock(ptl); 3612 spin_lock(ptl);
3612 if (pud_none(*pud)) 3613 if (pud_none(*pud)) {
3613 pud_populate(mm, pud, 3614 pud_populate(mm, pud,
3614 (pmd_t *)((unsigned long)spte & PAGE_MASK)); 3615 (pmd_t *)((unsigned long)spte & PAGE_MASK));
3615 else 3616 } else {
3616 put_page(virt_to_page(spte)); 3617 put_page(virt_to_page(spte));
3618 mm_inc_nr_pmds(mm);
3619 }
3617 spin_unlock(ptl); 3620 spin_unlock(ptl);
3618out: 3621out:
3619 pte = (pte_t *)pmd_alloc(mm, pud, addr); 3622 pte = (pte_t *)pmd_alloc(mm, pud, addr);
@@ -3644,6 +3647,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
3644 3647
3645 pud_clear(pud); 3648 pud_clear(pud);
3646 put_page(virt_to_page(ptep)); 3649 put_page(virt_to_page(ptep));
3650 mm_dec_nr_pmds(mm);
3647 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; 3651 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
3648 return 1; 3652 return 1;
3649} 3653}
diff --git a/mm/memory.c b/mm/memory.c
index d63849b5188f..bbe6a73a899d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -428,6 +428,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
428 pmd = pmd_offset(pud, start); 428 pmd = pmd_offset(pud, start);
429 pud_clear(pud); 429 pud_clear(pud);
430 pmd_free_tlb(tlb, pmd, start); 430 pmd_free_tlb(tlb, pmd, start);
431 mm_dec_nr_pmds(tlb->mm);
431} 432}
432 433
433static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 434static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -3322,15 +3323,17 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
3322 3323
3323 spin_lock(&mm->page_table_lock); 3324 spin_lock(&mm->page_table_lock);
3324#ifndef __ARCH_HAS_4LEVEL_HACK 3325#ifndef __ARCH_HAS_4LEVEL_HACK
3325 if (pud_present(*pud)) /* Another has populated it */ 3326 if (!pud_present(*pud)) {
3326 pmd_free(mm, new); 3327 mm_inc_nr_pmds(mm);
3327 else
3328 pud_populate(mm, pud, new); 3328 pud_populate(mm, pud, new);
3329#else 3329 } else /* Another has populated it */
3330 if (pgd_present(*pud)) /* Another has populated it */
3331 pmd_free(mm, new); 3330 pmd_free(mm, new);
3332 else 3331#else
3332 if (!pgd_present(*pud)) {
3333 mm_inc_nr_pmds(mm);
3333 pgd_populate(mm, pud, new); 3334 pgd_populate(mm, pud, new);
3335 } else /* Another has populated it */
3336 pmd_free(mm, new);
3334#endif /* __ARCH_HAS_4LEVEL_HACK */ 3337#endif /* __ARCH_HAS_4LEVEL_HACK */
3335 spin_unlock(&mm->page_table_lock); 3338 spin_unlock(&mm->page_table_lock);
3336 return 0; 3339 return 0;
diff --git a/mm/mmap.c b/mm/mmap.c
index 14d84666e8ba..6a7d36d133fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2853,7 +2853,9 @@ void exit_mmap(struct mm_struct *mm)
2853 vm_unacct_memory(nr_accounted); 2853 vm_unacct_memory(nr_accounted);
2854 2854
2855 WARN_ON(atomic_long_read(&mm->nr_ptes) > 2855 WARN_ON(atomic_long_read(&mm->nr_ptes) >
2856 (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); 2856 round_up(FIRST_USER_ADDRESS, PMD_SIZE) >> PMD_SHIFT);
2857 WARN_ON(mm_nr_pmds(mm) >
2858 round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT);
2857} 2859}
2858 2860
2859/* Insert vm structure into process list sorted by address 2861/* Insert vm structure into process list sorted by address
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b8df76ee2be3..642f38cb175a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -169,8 +169,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
169 * The baseline for the badness score is the proportion of RAM that each 169 * The baseline for the badness score is the proportion of RAM that each
170 * task's rss, pagetable and swap space use. 170 * task's rss, pagetable and swap space use.
171 */ 171 */
172 points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) + 172 points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
173 get_mm_counter(p->mm, MM_SWAPENTS); 173 atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm);
174 task_unlock(p); 174 task_unlock(p);
175 175
176 /* 176 /*
@@ -351,7 +351,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
351 struct task_struct *p; 351 struct task_struct *p;
352 struct task_struct *task; 352 struct task_struct *task;
353 353
354 pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); 354 pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name\n");
355 rcu_read_lock(); 355 rcu_read_lock();
356 for_each_process(p) { 356 for_each_process(p) {
357 if (oom_unkillable_task(p, memcg, nodemask)) 357 if (oom_unkillable_task(p, memcg, nodemask))
@@ -367,10 +367,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
367 continue; 367 continue;
368 } 368 }
369 369
370 pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu %5hd %s\n", 370 pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu %5hd %s\n",
371 task->pid, from_kuid(&init_user_ns, task_uid(task)), 371 task->pid, from_kuid(&init_user_ns, task_uid(task)),
372 task->tgid, task->mm->total_vm, get_mm_rss(task->mm), 372 task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
373 atomic_long_read(&task->mm->nr_ptes), 373 atomic_long_read(&task->mm->nr_ptes),
374 mm_nr_pmds(task->mm),
374 get_mm_counter(task->mm, MM_SWAPENTS), 375 get_mm_counter(task->mm, MM_SWAPENTS),
375 task->signal->oom_score_adj, task->comm); 376 task->signal->oom_score_adj, task->comm);
376 task_unlock(task); 377 task_unlock(task);