aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c163
1 files changed, 88 insertions, 75 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 665dcd7abfff..e3cd40b2d5d9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,6 +59,7 @@
59#include <net/sock.h> 59#include <net/sock.h>
60#include <net/ip.h> 60#include <net/ip.h>
61#include <net/tcp_memcontrol.h> 61#include <net/tcp_memcontrol.h>
62#include "slab.h"
62 63
63#include <asm/uaccess.h> 64#include <asm/uaccess.h>
64 65
@@ -499,6 +500,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
499 return (memcg == root_mem_cgroup); 500 return (memcg == root_mem_cgroup);
500} 501}
501 502
503/*
504 * We restrict the id in the range of [1, 65535], so it can fit into
505 * an unsigned short.
506 */
507#define MEM_CGROUP_ID_MAX USHRT_MAX
508
509static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
510{
511 /*
512 * The ID of the root cgroup is 0, but memcg treat 0 as an
513 * invalid ID, so we return (cgroup_id + 1).
514 */
515 return memcg->css.cgroup->id + 1;
516}
517
518static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
519{
520 struct cgroup_subsys_state *css;
521
522 css = css_from_id(id - 1, &mem_cgroup_subsys);
523 return mem_cgroup_from_css(css);
524}
525
502/* Writing them here to avoid exposing memcg's inner layout */ 526/* Writing them here to avoid exposing memcg's inner layout */
503#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) 527#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
504 528
@@ -570,16 +594,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
570#ifdef CONFIG_MEMCG_KMEM 594#ifdef CONFIG_MEMCG_KMEM
571/* 595/*
572 * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. 596 * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
573 * There are two main reasons for not using the css_id for this: 597 * The main reason for not using cgroup id for this:
574 * 1) this works better in sparse environments, where we have a lot of memcgs, 598 * this works better in sparse environments, where we have a lot of memcgs,
575 * but only a few kmem-limited. Or also, if we have, for instance, 200 599 * but only a few kmem-limited. Or also, if we have, for instance, 200
576 * memcgs, and none but the 200th is kmem-limited, we'd have to have a 600 * memcgs, and none but the 200th is kmem-limited, we'd have to have a
577 * 200 entry array for that. 601 * 200 entry array for that.
578 *
579 * 2) In order not to violate the cgroup API, we would like to do all memory
580 * allocation in ->create(). At that point, we haven't yet allocated the
581 * css_id. Having a separate index prevents us from messing with the cgroup
582 * core for this
583 * 602 *
584 * The current size of the caches array is stored in 603 * The current size of the caches array is stored in
585 * memcg_limited_groups_array_size. It will double each time we have to 604 * memcg_limited_groups_array_size. It will double each time we have to
@@ -594,14 +613,14 @@ int memcg_limited_groups_array_size;
594 * cgroups is a reasonable guess. In the future, it could be a parameter or 613 * cgroups is a reasonable guess. In the future, it could be a parameter or
595 * tunable, but that is strictly not necessary. 614 * tunable, but that is strictly not necessary.
596 * 615 *
597 * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get 616 * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
598 * this constant directly from cgroup, but it is understandable that this is 617 * this constant directly from cgroup, but it is understandable that this is
599 * better kept as an internal representation in cgroup.c. In any case, the 618 * better kept as an internal representation in cgroup.c. In any case, the
600 * css_id space is not getting any smaller, and we don't have to necessarily 619 * cgrp_id space is not getting any smaller, and we don't have to necessarily
601 * increase ours as well if it increases. 620 * increase ours as well if it increases.
602 */ 621 */
603#define MEMCG_CACHES_MIN_SIZE 4 622#define MEMCG_CACHES_MIN_SIZE 4
604#define MEMCG_CACHES_MAX_SIZE 65535 623#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
605 624
606/* 625/*
607 * A lot of the calls to the cache allocation functions are expected to be 626 * A lot of the calls to the cache allocation functions are expected to be
@@ -1408,7 +1427,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1408 return true; 1427 return true;
1409 if (!root_memcg->use_hierarchy || !memcg) 1428 if (!root_memcg->use_hierarchy || !memcg)
1410 return false; 1429 return false;
1411 return css_is_ancestor(&memcg->css, &root_memcg->css); 1430 return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
1412} 1431}
1413 1432
1414static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 1433static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@@ -2826,15 +2845,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2826 */ 2845 */
2827static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) 2846static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2828{ 2847{
2829 struct cgroup_subsys_state *css;
2830
2831 /* ID 0 is unused ID */ 2848 /* ID 0 is unused ID */
2832 if (!id) 2849 if (!id)
2833 return NULL; 2850 return NULL;
2834 css = css_lookup(&mem_cgroup_subsys, id); 2851 return mem_cgroup_from_id(id);
2835 if (!css)
2836 return NULL;
2837 return mem_cgroup_from_css(css);
2838} 2852}
2839 2853
2840struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2854struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -2955,7 +2969,7 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
2955 2969
2956 VM_BUG_ON(p->is_root_cache); 2970 VM_BUG_ON(p->is_root_cache);
2957 cachep = p->root_cache; 2971 cachep = p->root_cache;
2958 return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)]; 2972 return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
2959} 2973}
2960 2974
2961#ifdef CONFIG_SLABINFO 2975#ifdef CONFIG_SLABINFO
@@ -2984,21 +2998,14 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2984 struct res_counter *fail_res; 2998 struct res_counter *fail_res;
2985 struct mem_cgroup *_memcg; 2999 struct mem_cgroup *_memcg;
2986 int ret = 0; 3000 int ret = 0;
2987 bool may_oom;
2988 3001
2989 ret = res_counter_charge(&memcg->kmem, size, &fail_res); 3002 ret = res_counter_charge(&memcg->kmem, size, &fail_res);
2990 if (ret) 3003 if (ret)
2991 return ret; 3004 return ret;
2992 3005
2993 /*
2994 * Conditions under which we can wait for the oom_killer. Those are
2995 * the same conditions tested by the core page allocator
2996 */
2997 may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
2998
2999 _memcg = memcg; 3006 _memcg = memcg;
3000 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, 3007 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
3001 &_memcg, may_oom); 3008 &_memcg, oom_gfp_allowed(gfp));
3002 3009
3003 if (ret == -EINTR) { 3010 if (ret == -EINTR) {
3004 /* 3011 /*
@@ -3138,7 +3145,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
3138{ 3145{
3139 struct memcg_cache_params *cur_params = s->memcg_params; 3146 struct memcg_cache_params *cur_params = s->memcg_params;
3140 3147
3141 VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache); 3148 VM_BUG_ON(!is_root_cache(s));
3142 3149
3143 if (num_groups > memcg_limited_groups_array_size) { 3150 if (num_groups > memcg_limited_groups_array_size) {
3144 int i; 3151 int i;
@@ -3399,7 +3406,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
3399 idx = memcg_cache_id(memcg); 3406 idx = memcg_cache_id(memcg);
3400 3407
3401 mutex_lock(&memcg_cache_mutex); 3408 mutex_lock(&memcg_cache_mutex);
3402 new_cachep = cachep->memcg_params->memcg_caches[idx]; 3409 new_cachep = cache_from_memcg_idx(cachep, idx);
3403 if (new_cachep) { 3410 if (new_cachep) {
3404 css_put(&memcg->css); 3411 css_put(&memcg->css);
3405 goto out; 3412 goto out;
@@ -3445,8 +3452,8 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3445 * we'll take the set_limit_mutex to protect ourselves against this. 3452 * we'll take the set_limit_mutex to protect ourselves against this.
3446 */ 3453 */
3447 mutex_lock(&set_limit_mutex); 3454 mutex_lock(&set_limit_mutex);
3448 for (i = 0; i < memcg_limited_groups_array_size; i++) { 3455 for_each_memcg_cache_index(i) {
3449 c = s->memcg_params->memcg_caches[i]; 3456 c = cache_from_memcg_idx(s, i);
3450 if (!c) 3457 if (!c)
3451 continue; 3458 continue;
3452 3459
@@ -3579,8 +3586,8 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
3579 * code updating memcg_caches will issue a write barrier to match this. 3586 * code updating memcg_caches will issue a write barrier to match this.
3580 */ 3587 */
3581 read_barrier_depends(); 3588 read_barrier_depends();
3582 if (likely(cachep->memcg_params->memcg_caches[idx])) { 3589 if (likely(cache_from_memcg_idx(cachep, idx))) {
3583 cachep = cachep->memcg_params->memcg_caches[idx]; 3590 cachep = cache_from_memcg_idx(cachep, idx);
3584 goto out; 3591 goto out;
3585 } 3592 }
3586 3593
@@ -4350,7 +4357,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
4350 * css_get() was called in uncharge(). 4357 * css_get() was called in uncharge().
4351 */ 4358 */
4352 if (do_swap_account && swapout && memcg) 4359 if (do_swap_account && swapout && memcg)
4353 swap_cgroup_record(ent, css_id(&memcg->css)); 4360 swap_cgroup_record(ent, mem_cgroup_id(memcg));
4354} 4361}
4355#endif 4362#endif
4356 4363
@@ -4402,8 +4409,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
4402{ 4409{
4403 unsigned short old_id, new_id; 4410 unsigned short old_id, new_id;
4404 4411
4405 old_id = css_id(&from->css); 4412 old_id = mem_cgroup_id(from);
4406 new_id = css_id(&to->css); 4413 new_id = mem_cgroup_id(to);
4407 4414
4408 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { 4415 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
4409 mem_cgroup_swap_statistics(from, false); 4416 mem_cgroup_swap_statistics(from, false);
@@ -5376,45 +5383,50 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
5376static int memcg_numa_stat_show(struct cgroup_subsys_state *css, 5383static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
5377 struct cftype *cft, struct seq_file *m) 5384 struct cftype *cft, struct seq_file *m)
5378{ 5385{
5386 struct numa_stat {
5387 const char *name;
5388 unsigned int lru_mask;
5389 };
5390
5391 static const struct numa_stat stats[] = {
5392 { "total", LRU_ALL },
5393 { "file", LRU_ALL_FILE },
5394 { "anon", LRU_ALL_ANON },
5395 { "unevictable", BIT(LRU_UNEVICTABLE) },
5396 };
5397 const struct numa_stat *stat;
5379 int nid; 5398 int nid;
5380 unsigned long total_nr, file_nr, anon_nr, unevictable_nr; 5399 unsigned long nr;
5381 unsigned long node_nr;
5382 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5400 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
5383 5401
5384 total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); 5402 for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
5385 seq_printf(m, "total=%lu", total_nr); 5403 nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
5386 for_each_node_state(nid, N_MEMORY) { 5404 seq_printf(m, "%s=%lu", stat->name, nr);
5387 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL); 5405 for_each_node_state(nid, N_MEMORY) {
5388 seq_printf(m, " N%d=%lu", nid, node_nr); 5406 nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
5389 } 5407 stat->lru_mask);
5390 seq_putc(m, '\n'); 5408 seq_printf(m, " N%d=%lu", nid, nr);
5391 5409 }
5392 file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE); 5410 seq_putc(m, '\n');
5393 seq_printf(m, "file=%lu", file_nr); 5411 }
5394 for_each_node_state(nid, N_MEMORY) { 5412
5395 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, 5413 for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
5396 LRU_ALL_FILE); 5414 struct mem_cgroup *iter;
5397 seq_printf(m, " N%d=%lu", nid, node_nr); 5415
5398 } 5416 nr = 0;
5399 seq_putc(m, '\n'); 5417 for_each_mem_cgroup_tree(iter, memcg)
5400 5418 nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask);
5401 anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON); 5419 seq_printf(m, "hierarchical_%s=%lu", stat->name, nr);
5402 seq_printf(m, "anon=%lu", anon_nr); 5420 for_each_node_state(nid, N_MEMORY) {
5403 for_each_node_state(nid, N_MEMORY) { 5421 nr = 0;
5404 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, 5422 for_each_mem_cgroup_tree(iter, memcg)
5405 LRU_ALL_ANON); 5423 nr += mem_cgroup_node_nr_lru_pages(
5406 seq_printf(m, " N%d=%lu", nid, node_nr); 5424 iter, nid, stat->lru_mask);
5425 seq_printf(m, " N%d=%lu", nid, nr);
5426 }
5427 seq_putc(m, '\n');
5407 } 5428 }
5408 seq_putc(m, '\n');
5409 5429
5410 unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
5411 seq_printf(m, "unevictable=%lu", unevictable_nr);
5412 for_each_node_state(nid, N_MEMORY) {
5413 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
5414 BIT(LRU_UNEVICTABLE));
5415 seq_printf(m, " N%d=%lu", nid, node_nr);
5416 }
5417 seq_putc(m, '\n');
5418 return 0; 5430 return 0;
5419} 5431}
5420#endif /* CONFIG_NUMA */ 5432#endif /* CONFIG_NUMA */
@@ -6166,7 +6178,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
6166 size_t size = memcg_size(); 6178 size_t size = memcg_size();
6167 6179
6168 mem_cgroup_remove_from_trees(memcg); 6180 mem_cgroup_remove_from_trees(memcg);
6169 free_css_id(&mem_cgroup_subsys, &memcg->css);
6170 6181
6171 for_each_node(node) 6182 for_each_node(node)
6172 free_mem_cgroup_per_zone_info(memcg, node); 6183 free_mem_cgroup_per_zone_info(memcg, node);
@@ -6269,6 +6280,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6269 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); 6280 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
6270 int error = 0; 6281 int error = 0;
6271 6282
6283 if (css->cgroup->id > MEM_CGROUP_ID_MAX)
6284 return -ENOSPC;
6285
6272 if (!parent) 6286 if (!parent)
6273 return 0; 6287 return 0;
6274 6288
@@ -6540,7 +6554,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
6540 } 6554 }
6541 /* There is a swap entry and a page doesn't exist or isn't charged */ 6555 /* There is a swap entry and a page doesn't exist or isn't charged */
6542 if (ent.val && !ret && 6556 if (ent.val && !ret &&
6543 css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { 6557 mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
6544 ret = MC_TARGET_SWAP; 6558 ret = MC_TARGET_SWAP;
6545 if (target) 6559 if (target)
6546 target->ent = ent; 6560 target->ent = ent;
@@ -6960,7 +6974,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
6960 .bind = mem_cgroup_bind, 6974 .bind = mem_cgroup_bind,
6961 .base_cftypes = mem_cgroup_files, 6975 .base_cftypes = mem_cgroup_files,
6962 .early_init = 0, 6976 .early_init = 0,
6963 .use_id = 1,
6964}; 6977};
6965 6978
6966#ifdef CONFIG_MEMCG_SWAP 6979#ifdef CONFIG_MEMCG_SWAP