aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c143
1 files changed, 55 insertions, 88 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c52ddbc839b..34d3ca9572d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -866,6 +866,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
866 unsigned long val = 0; 866 unsigned long val = 0;
867 int cpu; 867 int cpu;
868 868
869 get_online_cpus();
869 for_each_online_cpu(cpu) 870 for_each_online_cpu(cpu)
870 val += per_cpu(memcg->stat->events[idx], cpu); 871 val += per_cpu(memcg->stat->events[idx], cpu);
871#ifdef CONFIG_HOTPLUG_CPU 872#ifdef CONFIG_HOTPLUG_CPU
@@ -873,6 +874,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
873 val += memcg->nocpu_base.events[idx]; 874 val += memcg->nocpu_base.events[idx];
874 spin_unlock(&memcg->pcp_counter_lock); 875 spin_unlock(&memcg->pcp_counter_lock);
875#endif 876#endif
877 put_online_cpus();
876 return val; 878 return val;
877} 879}
878 880
@@ -2159,110 +2161,59 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
2159 memcg_wakeup_oom(memcg); 2161 memcg_wakeup_oom(memcg);
2160} 2162}
2161 2163
2162/*
2163 * try to call OOM killer
2164 */
2165static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) 2164static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
2166{ 2165{
2167 bool locked;
2168 int wakeups;
2169
2170 if (!current->memcg_oom.may_oom) 2166 if (!current->memcg_oom.may_oom)
2171 return; 2167 return;
2172
2173 current->memcg_oom.in_memcg_oom = 1;
2174
2175 /* 2168 /*
2176 * As with any blocking lock, a contender needs to start 2169 * We are in the middle of the charge context here, so we
2177 * listening for wakeups before attempting the trylock, 2170 * don't want to block when potentially sitting on a callstack
2178 * otherwise it can miss the wakeup from the unlock and sleep 2171 * that holds all kinds of filesystem and mm locks.
2179 * indefinitely. This is just open-coded because our locking 2172 *
2180 * is so particular to memcg hierarchies. 2173 * Also, the caller may handle a failed allocation gracefully
2174 * (like optional page cache readahead) and so an OOM killer
2175 * invocation might not even be necessary.
2176 *
2177 * That's why we don't do anything here except remember the
2178 * OOM context and then deal with it at the end of the page
2179 * fault when the stack is unwound, the locks are released,
2180 * and when we know whether the fault was overall successful.
2181 */ 2181 */
2182 wakeups = atomic_read(&memcg->oom_wakeups); 2182 css_get(&memcg->css);
2183 mem_cgroup_mark_under_oom(memcg); 2183 current->memcg_oom.memcg = memcg;
2184 2184 current->memcg_oom.gfp_mask = mask;
2185 locked = mem_cgroup_oom_trylock(memcg); 2185 current->memcg_oom.order = order;
2186
2187 if (locked)
2188 mem_cgroup_oom_notify(memcg);
2189
2190 if (locked && !memcg->oom_kill_disable) {
2191 mem_cgroup_unmark_under_oom(memcg);
2192 mem_cgroup_out_of_memory(memcg, mask, order);
2193 mem_cgroup_oom_unlock(memcg);
2194 /*
2195 * There is no guarantee that an OOM-lock contender
2196 * sees the wakeups triggered by the OOM kill
2197 * uncharges. Wake any sleepers explicitely.
2198 */
2199 memcg_oom_recover(memcg);
2200 } else {
2201 /*
2202 * A system call can just return -ENOMEM, but if this
2203 * is a page fault and somebody else is handling the
2204 * OOM already, we need to sleep on the OOM waitqueue
2205 * for this memcg until the situation is resolved.
2206 * Which can take some time because it might be
2207 * handled by a userspace task.
2208 *
2209 * However, this is the charge context, which means
2210 * that we may sit on a large call stack and hold
2211 * various filesystem locks, the mmap_sem etc. and we
2212 * don't want the OOM handler to deadlock on them
2213 * while we sit here and wait. Store the current OOM
2214 * context in the task_struct, then return -ENOMEM.
2215 * At the end of the page fault handler, with the
2216 * stack unwound, pagefault_out_of_memory() will check
2217 * back with us by calling
2218 * mem_cgroup_oom_synchronize(), possibly putting the
2219 * task to sleep.
2220 */
2221 current->memcg_oom.oom_locked = locked;
2222 current->memcg_oom.wakeups = wakeups;
2223 css_get(&memcg->css);
2224 current->memcg_oom.wait_on_memcg = memcg;
2225 }
2226} 2186}
2227 2187
2228/** 2188/**
2229 * mem_cgroup_oom_synchronize - complete memcg OOM handling 2189 * mem_cgroup_oom_synchronize - complete memcg OOM handling
2190 * @handle: actually kill/wait or just clean up the OOM state
2230 * 2191 *
2231 * This has to be called at the end of a page fault if the the memcg 2192 * This has to be called at the end of a page fault if the memcg OOM
2232 * OOM handler was enabled and the fault is returning %VM_FAULT_OOM. 2193 * handler was enabled.
2233 * 2194 *
2234 * Memcg supports userspace OOM handling, so failed allocations must 2195 * Memcg supports userspace OOM handling where failed allocations must
2235 * sleep on a waitqueue until the userspace task resolves the 2196 * sleep on a waitqueue until the userspace task resolves the
2236 * situation. Sleeping directly in the charge context with all kinds 2197 * situation. Sleeping directly in the charge context with all kinds
2237 * of locks held is not a good idea, instead we remember an OOM state 2198 * of locks held is not a good idea, instead we remember an OOM state
2238 * in the task and mem_cgroup_oom_synchronize() has to be called at 2199 * in the task and mem_cgroup_oom_synchronize() has to be called at
2239 * the end of the page fault to put the task to sleep and clean up the 2200 * the end of the page fault to complete the OOM handling.
2240 * OOM state.
2241 * 2201 *
2242 * Returns %true if an ongoing memcg OOM situation was detected and 2202 * Returns %true if an ongoing memcg OOM situation was detected and
2243 * finalized, %false otherwise. 2203 * completed, %false otherwise.
2244 */ 2204 */
2245bool mem_cgroup_oom_synchronize(void) 2205bool mem_cgroup_oom_synchronize(bool handle)
2246{ 2206{
2207 struct mem_cgroup *memcg = current->memcg_oom.memcg;
2247 struct oom_wait_info owait; 2208 struct oom_wait_info owait;
2248 struct mem_cgroup *memcg; 2209 bool locked;
2249 2210
2250 /* OOM is global, do not handle */ 2211 /* OOM is global, do not handle */
2251 if (!current->memcg_oom.in_memcg_oom)
2252 return false;
2253
2254 /*
2255 * We invoked the OOM killer but there is a chance that a kill
2256 * did not free up any charges. Everybody else might already
2257 * be sleeping, so restart the fault and keep the rampage
2258 * going until some charges are released.
2259 */
2260 memcg = current->memcg_oom.wait_on_memcg;
2261 if (!memcg) 2212 if (!memcg)
2262 goto out; 2213 return false;
2263 2214
2264 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) 2215 if (!handle)
2265 goto out_memcg; 2216 goto cleanup;
2266 2217
2267 owait.memcg = memcg; 2218 owait.memcg = memcg;
2268 owait.wait.flags = 0; 2219 owait.wait.flags = 0;
@@ -2271,13 +2222,25 @@ bool mem_cgroup_oom_synchronize(void)
2271 INIT_LIST_HEAD(&owait.wait.task_list); 2222 INIT_LIST_HEAD(&owait.wait.task_list);
2272 2223
2273 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); 2224 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
2274 /* Only sleep if we didn't miss any wakeups since OOM */ 2225 mem_cgroup_mark_under_oom(memcg);
2275 if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups) 2226
2227 locked = mem_cgroup_oom_trylock(memcg);
2228
2229 if (locked)
2230 mem_cgroup_oom_notify(memcg);
2231
2232 if (locked && !memcg->oom_kill_disable) {
2233 mem_cgroup_unmark_under_oom(memcg);
2234 finish_wait(&memcg_oom_waitq, &owait.wait);
2235 mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
2236 current->memcg_oom.order);
2237 } else {
2276 schedule(); 2238 schedule();
2277 finish_wait(&memcg_oom_waitq, &owait.wait); 2239 mem_cgroup_unmark_under_oom(memcg);
2278out_memcg: 2240 finish_wait(&memcg_oom_waitq, &owait.wait);
2279 mem_cgroup_unmark_under_oom(memcg); 2241 }
2280 if (current->memcg_oom.oom_locked) { 2242
2243 if (locked) {
2281 mem_cgroup_oom_unlock(memcg); 2244 mem_cgroup_oom_unlock(memcg);
2282 /* 2245 /*
2283 * There is no guarantee that an OOM-lock contender 2246 * There is no guarantee that an OOM-lock contender
@@ -2286,10 +2249,9 @@ out_memcg:
2286 */ 2249 */
2287 memcg_oom_recover(memcg); 2250 memcg_oom_recover(memcg);
2288 } 2251 }
2252cleanup:
2253 current->memcg_oom.memcg = NULL;
2289 css_put(&memcg->css); 2254 css_put(&memcg->css);
2290 current->memcg_oom.wait_on_memcg = NULL;
2291out:
2292 current->memcg_oom.in_memcg_oom = 0;
2293 return true; 2255 return true;
2294} 2256}
2295 2257
@@ -2703,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2703 || fatal_signal_pending(current))) 2665 || fatal_signal_pending(current)))
2704 goto bypass; 2666 goto bypass;
2705 2667
2668 if (unlikely(task_in_memcg_oom(current)))
2669 goto bypass;
2670
2706 /* 2671 /*
2707 * We always charge the cgroup the mm_struct belongs to. 2672 * We always charge the cgroup the mm_struct belongs to.
2708 * The mm_struct's mem_cgroup changes on task migration if the 2673 * The mm_struct's mem_cgroup changes on task migration if the
@@ -2801,6 +2766,8 @@ done:
2801 return 0; 2766 return 0;
2802nomem: 2767nomem:
2803 *ptr = NULL; 2768 *ptr = NULL;
2769 if (gfp_mask & __GFP_NOFAIL)
2770 return 0;
2804 return -ENOMEM; 2771 return -ENOMEM;
2805bypass: 2772bypass:
2806 *ptr = root_mem_cgroup; 2773 *ptr = root_mem_cgroup;