summaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
authorDennis Zhou (Facebook) <dennisszhou@gmail.com>2018-08-22 00:53:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-22 13:52:45 -0400
commit7e8a6304d5419cbf056a59de92939e5eef039c57 (patch)
tree6fa617d1ee50064ee3b93f1f95740d54ed883f18 /mm/percpu.c
parent3d8b38eb81cac81395f6a823f6bf401b327268e6 (diff)
/proc/meminfo: add percpu populated pages count
Currently, percpu memory only exposes allocation and utilization information via debugfs. This more or less is only really useful for understanding the fragmentation and allocation information at a per-chunk level with a few global counters. This is also gated behind a config. BPF and cgroup, for example, have seen an increase in use causing increased use of percpu memory. Let's make it easier for someone to identify how much memory is being used. This patch adds the "Percpu" stat to meminfo to more easily look up how much percpu memory is in use. This number includes the cost for all allocated backing pages and not just insight at the per a unit, per chunk level. Metadata is excluded. I think excluding metadata is fair because the backing memory scales with the numbere of cpus and can quickly outweigh the metadata. It also makes this calculation light. Link: http://lkml.kernel.org/r/20180807184723.74919-1-dennisszhou@gmail.com Signed-off-by: Dennis Zhou <dennisszhou@gmail.com> Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Roman Gushchin <guro@fb.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: David Rientjes <rientjes@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Lameter <cl@linux.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 0b6480979ac7..a749d4d96e3e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -170,6 +170,14 @@ static LIST_HEAD(pcpu_map_extend_chunks);
170int pcpu_nr_empty_pop_pages; 170int pcpu_nr_empty_pop_pages;
171 171
172/* 172/*
173 * The number of populated pages in use by the allocator, protected by
174 * pcpu_lock. This number is kept per a unit per chunk (i.e. when a page gets
175 * allocated/deallocated, it is allocated/deallocated in all units of a chunk
176 * and increments/decrements this count by 1).
177 */
178static unsigned long pcpu_nr_populated;
179
180/*
173 * Balance work is used to populate or destroy chunks asynchronously. We 181 * Balance work is used to populate or destroy chunks asynchronously. We
174 * try to keep the number of populated free pages between 182 * try to keep the number of populated free pages between
175 * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one 183 * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
@@ -1232,6 +1240,7 @@ static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
1232 1240
1233 bitmap_set(chunk->populated, page_start, nr); 1241 bitmap_set(chunk->populated, page_start, nr);
1234 chunk->nr_populated += nr; 1242 chunk->nr_populated += nr;
1243 pcpu_nr_populated += nr;
1235 1244
1236 if (!for_alloc) { 1245 if (!for_alloc) {
1237 chunk->nr_empty_pop_pages += nr; 1246 chunk->nr_empty_pop_pages += nr;
@@ -1260,6 +1269,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1260 chunk->nr_populated -= nr; 1269 chunk->nr_populated -= nr;
1261 chunk->nr_empty_pop_pages -= nr; 1270 chunk->nr_empty_pop_pages -= nr;
1262 pcpu_nr_empty_pop_pages -= nr; 1271 pcpu_nr_empty_pop_pages -= nr;
1272 pcpu_nr_populated -= nr;
1263} 1273}
1264 1274
1265/* 1275/*
@@ -2176,6 +2186,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
2176 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; 2186 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
2177 pcpu_chunk_relocate(pcpu_first_chunk, -1); 2187 pcpu_chunk_relocate(pcpu_first_chunk, -1);
2178 2188
2189 /* include all regions of the first chunk */
2190 pcpu_nr_populated += PFN_DOWN(size_sum);
2191
2179 pcpu_stats_chunk_alloc(); 2192 pcpu_stats_chunk_alloc();
2180 trace_percpu_create_chunk(base_addr); 2193 trace_percpu_create_chunk(base_addr);
2181 2194
@@ -2746,6 +2759,22 @@ void __init setup_per_cpu_areas(void)
2746#endif /* CONFIG_SMP */ 2759#endif /* CONFIG_SMP */
2747 2760
2748/* 2761/*
2762 * pcpu_nr_pages - calculate total number of populated backing pages
2763 *
2764 * This reflects the number of pages populated to back chunks. Metadata is
2765 * excluded in the number exposed in meminfo as the number of backing pages
2766 * scales with the number of cpus and can quickly outweigh the memory used for
2767 * metadata. It also keeps this calculation nice and simple.
2768 *
2769 * RETURNS:
2770 * Total number of populated backing pages in use by the allocator.
2771 */
2772unsigned long pcpu_nr_pages(void)
2773{
2774 return pcpu_nr_populated * pcpu_nr_units;
2775}
2776
2777/*
2749 * Percpu allocator is initialized early during boot when neither slab or 2778 * Percpu allocator is initialized early during boot when neither slab or
2750 * workqueue is available. Plug async management until everything is up 2779 * workqueue is available. Plug async management until everything is up
2751 * and running. 2780 * and running.