aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2013-02-22 19:34:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:18 -0500
commit45cf7ebd5a03317eb825c9dcb8599750d8b16145 (patch)
tree3331759c81f08f2df0e5524b473bea282329dd6e /mm/memcontrol.c
parenta4e1b4c6c6db7b7d1ca7f399b4e08aa381f23899 (diff)
memcg: reduce the size of struct memcg 244-fold.
In order to maintain all the memcg bookkeeping, we need per-node descriptors, which will in turn contain a per-zone descriptor. Because we want to statically allocate those, this array ends up being very big. Part of the reason is that we allocate something large enough to hold MAX_NUMNODES, the compile time constant that holds the maximum number of nodes we would ever consider. However, we can do better in some cases if the firmware help us. This is true for modern x86 machines; coincidentally one of the architectures in which MAX_NUMNODES tends to be very big. By using the firmware-provided maximum number of nodes instead of MAX_NUMNODES, we can reduce the memory footprint of struct memcg considerably. In the extreme case in which we have only one node, this reduces the size of the structure from ~ 64k to ~2k. This is particularly important because it means that we will no longer resort to the vmalloc area for the struct memcg on defconfigs. We also have enough room for an extra node and still be outside vmalloc. One also has to keep in mind that with the industry's ability to fit more processors in a die as fast as the FED prints money, a nodes = 2 configuration is already respectably big. [akpm@linux-foundation.org: add check for invalid nid, remove inline] Signed-off-by: Glauber Costa <glommer@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ying Han <yinghan@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c41
1 files changed, 26 insertions, 15 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 972f822b142b..ae0433885b69 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -180,7 +180,7 @@ struct mem_cgroup_per_node {
180}; 180};
181 181
182struct mem_cgroup_lru_info { 182struct mem_cgroup_lru_info {
183 struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; 183 struct mem_cgroup_per_node *nodeinfo[0];
184}; 184};
185 185
186/* 186/*
@@ -284,17 +284,6 @@ struct mem_cgroup {
284 */ 284 */
285 struct res_counter kmem; 285 struct res_counter kmem;
286 /* 286 /*
287 * Per cgroup active and inactive list, similar to the
288 * per zone LRU lists.
289 */
290 struct mem_cgroup_lru_info info;
291 int last_scanned_node;
292#if MAX_NUMNODES > 1
293 nodemask_t scan_nodes;
294 atomic_t numainfo_events;
295 atomic_t numainfo_updating;
296#endif
297 /*
298 * Should the accounting and control be hierarchical, per subtree? 287 * Should the accounting and control be hierarchical, per subtree?
299 */ 288 */
300 bool use_hierarchy; 289 bool use_hierarchy;
@@ -357,8 +346,29 @@ struct mem_cgroup {
357 /* Index in the kmem_cache->memcg_params->memcg_caches array */ 346 /* Index in the kmem_cache->memcg_params->memcg_caches array */
358 int kmemcg_id; 347 int kmemcg_id;
359#endif 348#endif
349
350 int last_scanned_node;
351#if MAX_NUMNODES > 1
352 nodemask_t scan_nodes;
353 atomic_t numainfo_events;
354 atomic_t numainfo_updating;
355#endif
356 /*
357 * Per cgroup active and inactive list, similar to the
358 * per zone LRU lists.
359 *
360 * WARNING: This has to be the last element of the struct. Don't
361 * add new fields after this point.
362 */
363 struct mem_cgroup_lru_info info;
360}; 364};
361 365
366static size_t memcg_size(void)
367{
368 return sizeof(struct mem_cgroup) +
369 nr_node_ids * sizeof(struct mem_cgroup_per_node);
370}
371
362/* internal only representation about the status of kmem accounting. */ 372/* internal only representation about the status of kmem accounting. */
363enum { 373enum {
364 KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ 374 KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
@@ -635,6 +645,7 @@ static void drain_all_stock_async(struct mem_cgroup *memcg);
635static struct mem_cgroup_per_zone * 645static struct mem_cgroup_per_zone *
636mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) 646mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
637{ 647{
648 VM_BUG_ON((unsigned)nid >= nr_node_ids);
638 return &memcg->info.nodeinfo[nid]->zoneinfo[zid]; 649 return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
639} 650}
640 651
@@ -5925,9 +5936,9 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
5925static struct mem_cgroup *mem_cgroup_alloc(void) 5936static struct mem_cgroup *mem_cgroup_alloc(void)
5926{ 5937{
5927 struct mem_cgroup *memcg; 5938 struct mem_cgroup *memcg;
5928 int size = sizeof(struct mem_cgroup); 5939 size_t size = memcg_size();
5929 5940
5930 /* Can be very big if MAX_NUMNODES is very big */ 5941 /* Can be very big if nr_node_ids is very big */
5931 if (size < PAGE_SIZE) 5942 if (size < PAGE_SIZE)
5932 memcg = kzalloc(size, GFP_KERNEL); 5943 memcg = kzalloc(size, GFP_KERNEL);
5933 else 5944 else
@@ -5964,7 +5975,7 @@ out_free:
5964static void __mem_cgroup_free(struct mem_cgroup *memcg) 5975static void __mem_cgroup_free(struct mem_cgroup *memcg)
5965{ 5976{
5966 int node; 5977 int node;
5967 int size = sizeof(struct mem_cgroup); 5978 size_t size = memcg_size();
5968 5979
5969 mem_cgroup_remove_from_trees(memcg); 5980 mem_cgroup_remove_from_trees(memcg);
5970 free_css_id(&mem_cgroup_subsys, &memcg->css); 5981 free_css_id(&mem_cgroup_subsys, &memcg->css);