aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2013-09-12 18:13:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 18:38:01 -0400
commitfb2a6fc56be66c169f8b80e07ed999ba453a2db2 (patch)
treeed5268be88df1d149802eb03e831d46c619cb8cc /mm
parent519e52473ebe9db5cdef44670d5a97f1fd53d721 (diff)
mm: memcg: rework and document OOM waiting and wakeup
The memcg OOM handler open-codes a sleeping lock for OOM serialization (trylock, wait, repeat) because the required locking is so specific to memcg hierarchies. However, it would be nice if this construct would be clearly recognizable and not be as obfuscated as it is right now. Clean up as follows: 1. Remove the return value of mem_cgroup_oom_unlock() 2. Rename mem_cgroup_oom_lock() to mem_cgroup_oom_trylock(). 3. Pull the prepare_to_wait() out of the memcg_oom_lock scope. This makes it more obvious that the task has to be on the waitqueue before attempting to OOM-trylock the hierarchy, to not miss any wakeups before going to sleep. It just didn't matter until now because it was all lumped together into the global memcg_oom_lock spinlock section. 4. Pull the mem_cgroup_oom_notify() out of the memcg_oom_lock scope. It is proctected by the hierarchical OOM-lock. 5. The memcg_oom_lock spinlock is only required to propagate the OOM lock in any given hierarchy atomically. Restrict its scope to mem_cgroup_oom_(trylock|unlock). 6. Do not wake up the waitqueue unconditionally at the end of the function. Only the lockholder has to wake up the next in line after releasing the lock. Note that the lockholder kicks off the OOM-killer, which in turn leads to wakeups from the uncharges of the exiting task. But a contender is not guaranteed to see them if it enters the OOM path after the OOM kills but before the lockholder releases the lock. Thus there has to be an explicit wakeup after releasing the lock. 7. Put the OOM task on the waitqueue before marking the hierarchy as under OOM as that is the point where we start to receive wakeups. No point in listening before being on the waitqueue. 8. Likewise, unmark the hierarchy before finishing the sleep, for symmetry. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: azurIt <azurit@pobox.sk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c83
1 files changed, 46 insertions, 37 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0980bbf6438d..04250cbf46c6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1916,15 +1916,18 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1916 return SKIP; 1916 return SKIP;
1917} 1917}
1918 1918
1919static DEFINE_SPINLOCK(memcg_oom_lock);
1920
1919/* 1921/*
1920 * Check OOM-Killer is already running under our hierarchy. 1922 * Check OOM-Killer is already running under our hierarchy.
1921 * If someone is running, return false. 1923 * If someone is running, return false.
1922 * Has to be called with memcg_oom_lock
1923 */ 1924 */
1924static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) 1925static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
1925{ 1926{
1926 struct mem_cgroup *iter, *failed = NULL; 1927 struct mem_cgroup *iter, *failed = NULL;
1927 1928
1929 spin_lock(&memcg_oom_lock);
1930
1928 for_each_mem_cgroup_tree(iter, memcg) { 1931 for_each_mem_cgroup_tree(iter, memcg) {
1929 if (iter->oom_lock) { 1932 if (iter->oom_lock) {
1930 /* 1933 /*
@@ -1938,33 +1941,33 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
1938 iter->oom_lock = true; 1941 iter->oom_lock = true;
1939 } 1942 }
1940 1943
1941 if (!failed) 1944 if (failed) {
1942 return true; 1945 /*
1943 1946 * OK, we failed to lock the whole subtree so we have
1944 /* 1947 * to clean up what we set up to the failing subtree
1945 * OK, we failed to lock the whole subtree so we have to clean up 1948 */
1946 * what we set up to the failing subtree 1949 for_each_mem_cgroup_tree(iter, memcg) {
1947 */ 1950 if (iter == failed) {
1948 for_each_mem_cgroup_tree(iter, memcg) { 1951 mem_cgroup_iter_break(memcg, iter);
1949 if (iter == failed) { 1952 break;
1950 mem_cgroup_iter_break(memcg, iter); 1953 }
1951 break; 1954 iter->oom_lock = false;
1952 } 1955 }
1953 iter->oom_lock = false;
1954 } 1956 }
1955 return false; 1957
1958 spin_unlock(&memcg_oom_lock);
1959
1960 return !failed;
1956} 1961}
1957 1962
1958/* 1963static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
1959 * Has to be called with memcg_oom_lock
1960 */
1961static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
1962{ 1964{
1963 struct mem_cgroup *iter; 1965 struct mem_cgroup *iter;
1964 1966
1967 spin_lock(&memcg_oom_lock);
1965 for_each_mem_cgroup_tree(iter, memcg) 1968 for_each_mem_cgroup_tree(iter, memcg)
1966 iter->oom_lock = false; 1969 iter->oom_lock = false;
1967 return 0; 1970 spin_unlock(&memcg_oom_lock);
1968} 1971}
1969 1972
1970static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) 1973static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
@@ -1988,7 +1991,6 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
1988 atomic_add_unless(&iter->under_oom, -1, 0); 1991 atomic_add_unless(&iter->under_oom, -1, 0);
1989} 1992}
1990 1993
1991static DEFINE_SPINLOCK(memcg_oom_lock);
1992static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); 1994static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
1993 1995
1994struct oom_wait_info { 1996struct oom_wait_info {
@@ -2035,45 +2037,52 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
2035 int order) 2037 int order)
2036{ 2038{
2037 struct oom_wait_info owait; 2039 struct oom_wait_info owait;
2038 bool locked, need_to_kill; 2040 bool locked;
2039 2041
2040 owait.memcg = memcg; 2042 owait.memcg = memcg;
2041 owait.wait.flags = 0; 2043 owait.wait.flags = 0;
2042 owait.wait.func = memcg_oom_wake_function; 2044 owait.wait.func = memcg_oom_wake_function;
2043 owait.wait.private = current; 2045 owait.wait.private = current;
2044 INIT_LIST_HEAD(&owait.wait.task_list); 2046 INIT_LIST_HEAD(&owait.wait.task_list);
2045 need_to_kill = true;
2046 mem_cgroup_mark_under_oom(memcg);
2047 2047
2048 /* At first, try to OOM lock hierarchy under memcg.*/
2049 spin_lock(&memcg_oom_lock);
2050 locked = mem_cgroup_oom_lock(memcg);
2051 /* 2048 /*
2049 * As with any blocking lock, a contender needs to start
2050 * listening for wakeups before attempting the trylock,
2051 * otherwise it can miss the wakeup from the unlock and sleep
2052 * indefinitely. This is just open-coded because our locking
2053 * is so particular to memcg hierarchies.
2054 *
2052 * Even if signal_pending(), we can't quit charge() loop without 2055 * Even if signal_pending(), we can't quit charge() loop without
2053 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL 2056 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
2054 * under OOM is always welcomed, use TASK_KILLABLE here. 2057 * under OOM is always welcomed, use TASK_KILLABLE here.
2055 */ 2058 */
2056 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); 2059 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
2057 if (!locked || memcg->oom_kill_disable) 2060 mem_cgroup_mark_under_oom(memcg);
2058 need_to_kill = false; 2061
2062 locked = mem_cgroup_oom_trylock(memcg);
2063
2059 if (locked) 2064 if (locked)
2060 mem_cgroup_oom_notify(memcg); 2065 mem_cgroup_oom_notify(memcg);
2061 spin_unlock(&memcg_oom_lock);
2062 2066
2063 if (need_to_kill) { 2067 if (locked && !memcg->oom_kill_disable) {
2068 mem_cgroup_unmark_under_oom(memcg);
2064 finish_wait(&memcg_oom_waitq, &owait.wait); 2069 finish_wait(&memcg_oom_waitq, &owait.wait);
2065 mem_cgroup_out_of_memory(memcg, mask, order); 2070 mem_cgroup_out_of_memory(memcg, mask, order);
2066 } else { 2071 } else {
2067 schedule(); 2072 schedule();
2073 mem_cgroup_unmark_under_oom(memcg);
2068 finish_wait(&memcg_oom_waitq, &owait.wait); 2074 finish_wait(&memcg_oom_waitq, &owait.wait);
2069 } 2075 }
2070 spin_lock(&memcg_oom_lock);
2071 if (locked)
2072 mem_cgroup_oom_unlock(memcg);
2073 memcg_wakeup_oom(memcg);
2074 spin_unlock(&memcg_oom_lock);
2075 2076
2076 mem_cgroup_unmark_under_oom(memcg); 2077 if (locked) {
2078 mem_cgroup_oom_unlock(memcg);
2079 /*
2080 * There is no guarantee that an OOM-lock contender
2081 * sees the wakeups triggered by the OOM kill
2082 * uncharges. Wake any sleepers explicitely.
2083 */
2084 memcg_oom_recover(memcg);
2085 }
2077 2086
2078 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) 2087 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
2079 return false; 2088 return false;