diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 163 |
1 files changed, 88 insertions, 75 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 665dcd7abfff..e3cd40b2d5d9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <net/sock.h> | 59 | #include <net/sock.h> |
60 | #include <net/ip.h> | 60 | #include <net/ip.h> |
61 | #include <net/tcp_memcontrol.h> | 61 | #include <net/tcp_memcontrol.h> |
62 | #include "slab.h" | ||
62 | 63 | ||
63 | #include <asm/uaccess.h> | 64 | #include <asm/uaccess.h> |
64 | 65 | ||
@@ -499,6 +500,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | |||
499 | return (memcg == root_mem_cgroup); | 500 | return (memcg == root_mem_cgroup); |
500 | } | 501 | } |
501 | 502 | ||
503 | /* | ||
504 | * We restrict the id in the range of [1, 65535], so it can fit into | ||
505 | * an unsigned short. | ||
506 | */ | ||
507 | #define MEM_CGROUP_ID_MAX USHRT_MAX | ||
508 | |||
509 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | ||
510 | { | ||
511 | /* | ||
512 | * The ID of the root cgroup is 0, but memcg treat 0 as an | ||
513 | * invalid ID, so we return (cgroup_id + 1). | ||
514 | */ | ||
515 | return memcg->css.cgroup->id + 1; | ||
516 | } | ||
517 | |||
518 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
519 | { | ||
520 | struct cgroup_subsys_state *css; | ||
521 | |||
522 | css = css_from_id(id - 1, &mem_cgroup_subsys); | ||
523 | return mem_cgroup_from_css(css); | ||
524 | } | ||
525 | |||
502 | /* Writing them here to avoid exposing memcg's inner layout */ | 526 | /* Writing them here to avoid exposing memcg's inner layout */ |
503 | #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) | 527 | #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) |
504 | 528 | ||
@@ -570,16 +594,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg) | |||
570 | #ifdef CONFIG_MEMCG_KMEM | 594 | #ifdef CONFIG_MEMCG_KMEM |
571 | /* | 595 | /* |
572 | * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. | 596 | * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. |
573 | * There are two main reasons for not using the css_id for this: | 597 | * The main reason for not using cgroup id for this: |
574 | * 1) this works better in sparse environments, where we have a lot of memcgs, | 598 | * this works better in sparse environments, where we have a lot of memcgs, |
575 | * but only a few kmem-limited. Or also, if we have, for instance, 200 | 599 | * but only a few kmem-limited. Or also, if we have, for instance, 200 |
576 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a | 600 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a |
577 | * 200 entry array for that. | 601 | * 200 entry array for that. |
578 | * | ||
579 | * 2) In order not to violate the cgroup API, we would like to do all memory | ||
580 | * allocation in ->create(). At that point, we haven't yet allocated the | ||
581 | * css_id. Having a separate index prevents us from messing with the cgroup | ||
582 | * core for this | ||
583 | * | 602 | * |
584 | * The current size of the caches array is stored in | 603 | * The current size of the caches array is stored in |
585 | * memcg_limited_groups_array_size. It will double each time we have to | 604 | * memcg_limited_groups_array_size. It will double each time we have to |
@@ -594,14 +613,14 @@ int memcg_limited_groups_array_size; | |||
594 | * cgroups is a reasonable guess. In the future, it could be a parameter or | 613 | * cgroups is a reasonable guess. In the future, it could be a parameter or |
595 | * tunable, but that is strictly not necessary. | 614 | * tunable, but that is strictly not necessary. |
596 | * | 615 | * |
597 | * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get | 616 | * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get |
598 | * this constant directly from cgroup, but it is understandable that this is | 617 | * this constant directly from cgroup, but it is understandable that this is |
599 | * better kept as an internal representation in cgroup.c. In any case, the | 618 | * better kept as an internal representation in cgroup.c. In any case, the |
600 | * css_id space is not getting any smaller, and we don't have to necessarily | 619 | * cgrp_id space is not getting any smaller, and we don't have to necessarily |
601 | * increase ours as well if it increases. | 620 | * increase ours as well if it increases. |
602 | */ | 621 | */ |
603 | #define MEMCG_CACHES_MIN_SIZE 4 | 622 | #define MEMCG_CACHES_MIN_SIZE 4 |
604 | #define MEMCG_CACHES_MAX_SIZE 65535 | 623 | #define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX |
605 | 624 | ||
606 | /* | 625 | /* |
607 | * A lot of the calls to the cache allocation functions are expected to be | 626 | * A lot of the calls to the cache allocation functions are expected to be |
@@ -1408,7 +1427,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, | |||
1408 | return true; | 1427 | return true; |
1409 | if (!root_memcg->use_hierarchy || !memcg) | 1428 | if (!root_memcg->use_hierarchy || !memcg) |
1410 | return false; | 1429 | return false; |
1411 | return css_is_ancestor(&memcg->css, &root_memcg->css); | 1430 | return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup); |
1412 | } | 1431 | } |
1413 | 1432 | ||
1414 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, | 1433 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, |
@@ -2826,15 +2845,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | |||
2826 | */ | 2845 | */ |
2827 | static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | 2846 | static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) |
2828 | { | 2847 | { |
2829 | struct cgroup_subsys_state *css; | ||
2830 | |||
2831 | /* ID 0 is unused ID */ | 2848 | /* ID 0 is unused ID */ |
2832 | if (!id) | 2849 | if (!id) |
2833 | return NULL; | 2850 | return NULL; |
2834 | css = css_lookup(&mem_cgroup_subsys, id); | 2851 | return mem_cgroup_from_id(id); |
2835 | if (!css) | ||
2836 | return NULL; | ||
2837 | return mem_cgroup_from_css(css); | ||
2838 | } | 2852 | } |
2839 | 2853 | ||
2840 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | 2854 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) |
@@ -2955,7 +2969,7 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | |||
2955 | 2969 | ||
2956 | VM_BUG_ON(p->is_root_cache); | 2970 | VM_BUG_ON(p->is_root_cache); |
2957 | cachep = p->root_cache; | 2971 | cachep = p->root_cache; |
2958 | return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)]; | 2972 | return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg)); |
2959 | } | 2973 | } |
2960 | 2974 | ||
2961 | #ifdef CONFIG_SLABINFO | 2975 | #ifdef CONFIG_SLABINFO |
@@ -2984,21 +2998,14 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) | |||
2984 | struct res_counter *fail_res; | 2998 | struct res_counter *fail_res; |
2985 | struct mem_cgroup *_memcg; | 2999 | struct mem_cgroup *_memcg; |
2986 | int ret = 0; | 3000 | int ret = 0; |
2987 | bool may_oom; | ||
2988 | 3001 | ||
2989 | ret = res_counter_charge(&memcg->kmem, size, &fail_res); | 3002 | ret = res_counter_charge(&memcg->kmem, size, &fail_res); |
2990 | if (ret) | 3003 | if (ret) |
2991 | return ret; | 3004 | return ret; |
2992 | 3005 | ||
2993 | /* | ||
2994 | * Conditions under which we can wait for the oom_killer. Those are | ||
2995 | * the same conditions tested by the core page allocator | ||
2996 | */ | ||
2997 | may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY); | ||
2998 | |||
2999 | _memcg = memcg; | 3006 | _memcg = memcg; |
3000 | ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, | 3007 | ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, |
3001 | &_memcg, may_oom); | 3008 | &_memcg, oom_gfp_allowed(gfp)); |
3002 | 3009 | ||
3003 | if (ret == -EINTR) { | 3010 | if (ret == -EINTR) { |
3004 | /* | 3011 | /* |
@@ -3138,7 +3145,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3138 | { | 3145 | { |
3139 | struct memcg_cache_params *cur_params = s->memcg_params; | 3146 | struct memcg_cache_params *cur_params = s->memcg_params; |
3140 | 3147 | ||
3141 | VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache); | 3148 | VM_BUG_ON(!is_root_cache(s)); |
3142 | 3149 | ||
3143 | if (num_groups > memcg_limited_groups_array_size) { | 3150 | if (num_groups > memcg_limited_groups_array_size) { |
3144 | int i; | 3151 | int i; |
@@ -3399,7 +3406,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, | |||
3399 | idx = memcg_cache_id(memcg); | 3406 | idx = memcg_cache_id(memcg); |
3400 | 3407 | ||
3401 | mutex_lock(&memcg_cache_mutex); | 3408 | mutex_lock(&memcg_cache_mutex); |
3402 | new_cachep = cachep->memcg_params->memcg_caches[idx]; | 3409 | new_cachep = cache_from_memcg_idx(cachep, idx); |
3403 | if (new_cachep) { | 3410 | if (new_cachep) { |
3404 | css_put(&memcg->css); | 3411 | css_put(&memcg->css); |
3405 | goto out; | 3412 | goto out; |
@@ -3445,8 +3452,8 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3445 | * we'll take the set_limit_mutex to protect ourselves against this. | 3452 | * we'll take the set_limit_mutex to protect ourselves against this. |
3446 | */ | 3453 | */ |
3447 | mutex_lock(&set_limit_mutex); | 3454 | mutex_lock(&set_limit_mutex); |
3448 | for (i = 0; i < memcg_limited_groups_array_size; i++) { | 3455 | for_each_memcg_cache_index(i) { |
3449 | c = s->memcg_params->memcg_caches[i]; | 3456 | c = cache_from_memcg_idx(s, i); |
3450 | if (!c) | 3457 | if (!c) |
3451 | continue; | 3458 | continue; |
3452 | 3459 | ||
@@ -3579,8 +3586,8 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, | |||
3579 | * code updating memcg_caches will issue a write barrier to match this. | 3586 | * code updating memcg_caches will issue a write barrier to match this. |
3580 | */ | 3587 | */ |
3581 | read_barrier_depends(); | 3588 | read_barrier_depends(); |
3582 | if (likely(cachep->memcg_params->memcg_caches[idx])) { | 3589 | if (likely(cache_from_memcg_idx(cachep, idx))) { |
3583 | cachep = cachep->memcg_params->memcg_caches[idx]; | 3590 | cachep = cache_from_memcg_idx(cachep, idx); |
3584 | goto out; | 3591 | goto out; |
3585 | } | 3592 | } |
3586 | 3593 | ||
@@ -4350,7 +4357,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) | |||
4350 | * css_get() was called in uncharge(). | 4357 | * css_get() was called in uncharge(). |
4351 | */ | 4358 | */ |
4352 | if (do_swap_account && swapout && memcg) | 4359 | if (do_swap_account && swapout && memcg) |
4353 | swap_cgroup_record(ent, css_id(&memcg->css)); | 4360 | swap_cgroup_record(ent, mem_cgroup_id(memcg)); |
4354 | } | 4361 | } |
4355 | #endif | 4362 | #endif |
4356 | 4363 | ||
@@ -4402,8 +4409,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
4402 | { | 4409 | { |
4403 | unsigned short old_id, new_id; | 4410 | unsigned short old_id, new_id; |
4404 | 4411 | ||
4405 | old_id = css_id(&from->css); | 4412 | old_id = mem_cgroup_id(from); |
4406 | new_id = css_id(&to->css); | 4413 | new_id = mem_cgroup_id(to); |
4407 | 4414 | ||
4408 | if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { | 4415 | if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { |
4409 | mem_cgroup_swap_statistics(from, false); | 4416 | mem_cgroup_swap_statistics(from, false); |
@@ -5376,45 +5383,50 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, | |||
5376 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | 5383 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, |
5377 | struct cftype *cft, struct seq_file *m) | 5384 | struct cftype *cft, struct seq_file *m) |
5378 | { | 5385 | { |
5386 | struct numa_stat { | ||
5387 | const char *name; | ||
5388 | unsigned int lru_mask; | ||
5389 | }; | ||
5390 | |||
5391 | static const struct numa_stat stats[] = { | ||
5392 | { "total", LRU_ALL }, | ||
5393 | { "file", LRU_ALL_FILE }, | ||
5394 | { "anon", LRU_ALL_ANON }, | ||
5395 | { "unevictable", BIT(LRU_UNEVICTABLE) }, | ||
5396 | }; | ||
5397 | const struct numa_stat *stat; | ||
5379 | int nid; | 5398 | int nid; |
5380 | unsigned long total_nr, file_nr, anon_nr, unevictable_nr; | 5399 | unsigned long nr; |
5381 | unsigned long node_nr; | ||
5382 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5400 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5383 | 5401 | ||
5384 | total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); | 5402 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { |
5385 | seq_printf(m, "total=%lu", total_nr); | 5403 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); |
5386 | for_each_node_state(nid, N_MEMORY) { | 5404 | seq_printf(m, "%s=%lu", stat->name, nr); |
5387 | node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL); | 5405 | for_each_node_state(nid, N_MEMORY) { |
5388 | seq_printf(m, " N%d=%lu", nid, node_nr); | 5406 | nr = mem_cgroup_node_nr_lru_pages(memcg, nid, |
5389 | } | 5407 | stat->lru_mask); |
5390 | seq_putc(m, '\n'); | 5408 | seq_printf(m, " N%d=%lu", nid, nr); |
5391 | 5409 | } | |
5392 | file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE); | 5410 | seq_putc(m, '\n'); |
5393 | seq_printf(m, "file=%lu", file_nr); | 5411 | } |
5394 | for_each_node_state(nid, N_MEMORY) { | 5412 | |
5395 | node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, | 5413 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { |
5396 | LRU_ALL_FILE); | 5414 | struct mem_cgroup *iter; |
5397 | seq_printf(m, " N%d=%lu", nid, node_nr); | 5415 | |
5398 | } | 5416 | nr = 0; |
5399 | seq_putc(m, '\n'); | 5417 | for_each_mem_cgroup_tree(iter, memcg) |
5400 | 5418 | nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask); | |
5401 | anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON); | 5419 | seq_printf(m, "hierarchical_%s=%lu", stat->name, nr); |
5402 | seq_printf(m, "anon=%lu", anon_nr); | 5420 | for_each_node_state(nid, N_MEMORY) { |
5403 | for_each_node_state(nid, N_MEMORY) { | 5421 | nr = 0; |
5404 | node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, | 5422 | for_each_mem_cgroup_tree(iter, memcg) |
5405 | LRU_ALL_ANON); | 5423 | nr += mem_cgroup_node_nr_lru_pages( |
5406 | seq_printf(m, " N%d=%lu", nid, node_nr); | 5424 | iter, nid, stat->lru_mask); |
5425 | seq_printf(m, " N%d=%lu", nid, nr); | ||
5426 | } | ||
5427 | seq_putc(m, '\n'); | ||
5407 | } | 5428 | } |
5408 | seq_putc(m, '\n'); | ||
5409 | 5429 | ||
5410 | unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); | ||
5411 | seq_printf(m, "unevictable=%lu", unevictable_nr); | ||
5412 | for_each_node_state(nid, N_MEMORY) { | ||
5413 | node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, | ||
5414 | BIT(LRU_UNEVICTABLE)); | ||
5415 | seq_printf(m, " N%d=%lu", nid, node_nr); | ||
5416 | } | ||
5417 | seq_putc(m, '\n'); | ||
5418 | return 0; | 5430 | return 0; |
5419 | } | 5431 | } |
5420 | #endif /* CONFIG_NUMA */ | 5432 | #endif /* CONFIG_NUMA */ |
@@ -6166,7 +6178,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
6166 | size_t size = memcg_size(); | 6178 | size_t size = memcg_size(); |
6167 | 6179 | ||
6168 | mem_cgroup_remove_from_trees(memcg); | 6180 | mem_cgroup_remove_from_trees(memcg); |
6169 | free_css_id(&mem_cgroup_subsys, &memcg->css); | ||
6170 | 6181 | ||
6171 | for_each_node(node) | 6182 | for_each_node(node) |
6172 | free_mem_cgroup_per_zone_info(memcg, node); | 6183 | free_mem_cgroup_per_zone_info(memcg, node); |
@@ -6269,6 +6280,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6269 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); | 6280 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); |
6270 | int error = 0; | 6281 | int error = 0; |
6271 | 6282 | ||
6283 | if (css->cgroup->id > MEM_CGROUP_ID_MAX) | ||
6284 | return -ENOSPC; | ||
6285 | |||
6272 | if (!parent) | 6286 | if (!parent) |
6273 | return 0; | 6287 | return 0; |
6274 | 6288 | ||
@@ -6540,7 +6554,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, | |||
6540 | } | 6554 | } |
6541 | /* There is a swap entry and a page doesn't exist or isn't charged */ | 6555 | /* There is a swap entry and a page doesn't exist or isn't charged */ |
6542 | if (ent.val && !ret && | 6556 | if (ent.val && !ret && |
6543 | css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { | 6557 | mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) { |
6544 | ret = MC_TARGET_SWAP; | 6558 | ret = MC_TARGET_SWAP; |
6545 | if (target) | 6559 | if (target) |
6546 | target->ent = ent; | 6560 | target->ent = ent; |
@@ -6960,7 +6974,6 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
6960 | .bind = mem_cgroup_bind, | 6974 | .bind = mem_cgroup_bind, |
6961 | .base_cftypes = mem_cgroup_files, | 6975 | .base_cftypes = mem_cgroup_files, |
6962 | .early_init = 0, | 6976 | .early_init = 0, |
6963 | .use_id = 1, | ||
6964 | }; | 6977 | }; |
6965 | 6978 | ||
6966 | #ifdef CONFIG_MEMCG_SWAP | 6979 | #ifdef CONFIG_MEMCG_SWAP |