diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 82 |
1 files changed, 75 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ac8664db3823..5339c89dff63 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4057,6 +4057,60 @@ static struct cftype mem_cgroup_legacy_files[] = { | |||
4057 | { }, /* terminate */ | 4057 | { }, /* terminate */ |
4058 | }; | 4058 | }; |
4059 | 4059 | ||
4060 | /* | ||
4061 | * Private memory cgroup IDR | ||
4062 | * | ||
4063 | * Swap-out records and page cache shadow entries need to store memcg | ||
4064 | * references in constrained space, so we maintain an ID space that is | ||
4065 | * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of | ||
4066 | * memory-controlled cgroups to 64k. | ||
4067 | * | ||
4068 | * However, there usually are many references to the oflline CSS after | ||
4069 | * the cgroup has been destroyed, such as page cache or reclaimable | ||
4070 | * slab objects, that don't need to hang on to the ID. We want to keep | ||
4071 | * those dead CSS from occupying IDs, or we might quickly exhaust the | ||
4072 | * relatively small ID space and prevent the creation of new cgroups | ||
4073 | * even when there are much fewer than 64k cgroups - possibly none. | ||
4074 | * | ||
4075 | * Maintain a private 16-bit ID space for memcg, and allow the ID to | ||
4076 | * be freed and recycled when it's no longer needed, which is usually | ||
4077 | * when the CSS is offlined. | ||
4078 | * | ||
4079 | * The only exception to that are records of swapped out tmpfs/shmem | ||
4080 | * pages that need to be attributed to live ancestors on swapin. But | ||
4081 | * those references are manageable from userspace. | ||
4082 | */ | ||
4083 | |||
4084 | static DEFINE_IDR(mem_cgroup_idr); | ||
4085 | |||
4086 | static void mem_cgroup_id_get(struct mem_cgroup *memcg) | ||
4087 | { | ||
4088 | atomic_inc(&memcg->id.ref); | ||
4089 | } | ||
4090 | |||
4091 | static void mem_cgroup_id_put(struct mem_cgroup *memcg) | ||
4092 | { | ||
4093 | if (atomic_dec_and_test(&memcg->id.ref)) { | ||
4094 | idr_remove(&mem_cgroup_idr, memcg->id.id); | ||
4095 | memcg->id.id = 0; | ||
4096 | |||
4097 | /* Memcg ID pins CSS */ | ||
4098 | css_put(&memcg->css); | ||
4099 | } | ||
4100 | } | ||
4101 | |||
4102 | /** | ||
4103 | * mem_cgroup_from_id - look up a memcg from a memcg id | ||
4104 | * @id: the memcg id to look up | ||
4105 | * | ||
4106 | * Caller must hold rcu_read_lock(). | ||
4107 | */ | ||
4108 | struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
4109 | { | ||
4110 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
4111 | return idr_find(&mem_cgroup_idr, id); | ||
4112 | } | ||
4113 | |||
4060 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | 4114 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) |
4061 | { | 4115 | { |
4062 | struct mem_cgroup_per_node *pn; | 4116 | struct mem_cgroup_per_node *pn; |
@@ -4116,6 +4170,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
4116 | if (!memcg) | 4170 | if (!memcg) |
4117 | return NULL; | 4171 | return NULL; |
4118 | 4172 | ||
4173 | memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, | ||
4174 | 1, MEM_CGROUP_ID_MAX, | ||
4175 | GFP_KERNEL); | ||
4176 | if (memcg->id.id < 0) | ||
4177 | goto fail; | ||
4178 | |||
4119 | memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); | 4179 | memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); |
4120 | if (!memcg->stat) | 4180 | if (!memcg->stat) |
4121 | goto fail; | 4181 | goto fail; |
@@ -4142,8 +4202,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
4142 | #ifdef CONFIG_CGROUP_WRITEBACK | 4202 | #ifdef CONFIG_CGROUP_WRITEBACK |
4143 | INIT_LIST_HEAD(&memcg->cgwb_list); | 4203 | INIT_LIST_HEAD(&memcg->cgwb_list); |
4144 | #endif | 4204 | #endif |
4205 | idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); | ||
4145 | return memcg; | 4206 | return memcg; |
4146 | fail: | 4207 | fail: |
4208 | if (memcg->id.id > 0) | ||
4209 | idr_remove(&mem_cgroup_idr, memcg->id.id); | ||
4147 | mem_cgroup_free(memcg); | 4210 | mem_cgroup_free(memcg); |
4148 | return NULL; | 4211 | return NULL; |
4149 | } | 4212 | } |
@@ -4206,12 +4269,11 @@ fail: | |||
4206 | return ERR_PTR(-ENOMEM); | 4269 | return ERR_PTR(-ENOMEM); |
4207 | } | 4270 | } |
4208 | 4271 | ||
4209 | static int | 4272 | static int mem_cgroup_css_online(struct cgroup_subsys_state *css) |
4210 | mem_cgroup_css_online(struct cgroup_subsys_state *css) | ||
4211 | { | 4273 | { |
4212 | if (css->id > MEM_CGROUP_ID_MAX) | 4274 | /* Online state pins memcg ID, memcg ID pins CSS */ |
4213 | return -ENOSPC; | 4275 | mem_cgroup_id_get(mem_cgroup_from_css(css)); |
4214 | 4276 | css_get(css); | |
4215 | return 0; | 4277 | return 0; |
4216 | } | 4278 | } |
4217 | 4279 | ||
@@ -4234,6 +4296,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
4234 | 4296 | ||
4235 | memcg_offline_kmem(memcg); | 4297 | memcg_offline_kmem(memcg); |
4236 | wb_memcg_offline(memcg); | 4298 | wb_memcg_offline(memcg); |
4299 | |||
4300 | mem_cgroup_id_put(memcg); | ||
4237 | } | 4301 | } |
4238 | 4302 | ||
4239 | static void mem_cgroup_css_released(struct cgroup_subsys_state *css) | 4303 | static void mem_cgroup_css_released(struct cgroup_subsys_state *css) |
@@ -5756,6 +5820,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |||
5756 | if (!memcg) | 5820 | if (!memcg) |
5757 | return; | 5821 | return; |
5758 | 5822 | ||
5823 | mem_cgroup_id_get(memcg); | ||
5759 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); | 5824 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); |
5760 | VM_BUG_ON_PAGE(oldid, page); | 5825 | VM_BUG_ON_PAGE(oldid, page); |
5761 | mem_cgroup_swap_statistics(memcg, true); | 5826 | mem_cgroup_swap_statistics(memcg, true); |
@@ -5774,6 +5839,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |||
5774 | VM_BUG_ON(!irqs_disabled()); | 5839 | VM_BUG_ON(!irqs_disabled()); |
5775 | mem_cgroup_charge_statistics(memcg, page, false, -1); | 5840 | mem_cgroup_charge_statistics(memcg, page, false, -1); |
5776 | memcg_check_events(memcg, page); | 5841 | memcg_check_events(memcg, page); |
5842 | |||
5843 | if (!mem_cgroup_is_root(memcg)) | ||
5844 | css_put(&memcg->css); | ||
5777 | } | 5845 | } |
5778 | 5846 | ||
5779 | /* | 5847 | /* |
@@ -5804,11 +5872,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) | |||
5804 | !page_counter_try_charge(&memcg->swap, 1, &counter)) | 5872 | !page_counter_try_charge(&memcg->swap, 1, &counter)) |
5805 | return -ENOMEM; | 5873 | return -ENOMEM; |
5806 | 5874 | ||
5875 | mem_cgroup_id_get(memcg); | ||
5807 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); | 5876 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); |
5808 | VM_BUG_ON_PAGE(oldid, page); | 5877 | VM_BUG_ON_PAGE(oldid, page); |
5809 | mem_cgroup_swap_statistics(memcg, true); | 5878 | mem_cgroup_swap_statistics(memcg, true); |
5810 | 5879 | ||
5811 | css_get(&memcg->css); | ||
5812 | return 0; | 5880 | return 0; |
5813 | } | 5881 | } |
5814 | 5882 | ||
@@ -5837,7 +5905,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) | |||
5837 | page_counter_uncharge(&memcg->memsw, 1); | 5905 | page_counter_uncharge(&memcg->memsw, 1); |
5838 | } | 5906 | } |
5839 | mem_cgroup_swap_statistics(memcg, false); | 5907 | mem_cgroup_swap_statistics(memcg, false); |
5840 | css_put(&memcg->css); | 5908 | mem_cgroup_id_put(memcg); |
5841 | } | 5909 | } |
5842 | rcu_read_unlock(); | 5910 | rcu_read_unlock(); |
5843 | } | 5911 | } |