aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-06-15 18:08:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-15 23:04:01 -0400
commit26fe616844491a41a1abc02e29f7a9d1ec2f8ddb (patch)
treecd8747ad8e96d6ebda771db41b96db11a4b09f26
parent7ae534d074e01e54d5cfbc9734b73fdfc855501f (diff)
memcg: fix percpu cached charge draining frequency
For performance, memory cgroup caches some "charge" from res_counter into per cpu cache. This works well but because it's cache, it needs to be flushed in some cases. Typical cases are 1. when someone hit limit. 2. when rmdir() is called and need to charges to be 0. But "1" has problem. Recently, with large SMP machines, we see many kworker runs because of flushing memcg's cache. Bad things in implementation are that even if a cpu contains a cache for memcg not related to a memcg which hits limit, drain code is called. This patch does A) check percpu cache contains a useful data or not. B) check other asynchronous percpu draining doesn't run. C) don't call local cpu callback. (*)This patch avoid changing the calling condition with hard-limit. When I run "cat 1Gfile > /dev/null" under 300M limit memcg, [Before] 13767 kamezawa 20 0 98.6m 424 416 D 10.0 0.0 0:00.61 cat 58 root 20 0 0 0 0 S 0.6 0.0 0:00.09 kworker/2:1 60 root 20 0 0 0 0 S 0.6 0.0 0:00.08 kworker/4:1 4 root 20 0 0 0 0 S 0.3 0.0 0:00.02 kworker/0:0 57 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/1:1 61 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/5:1 62 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/6:1 63 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/7:1 [After] 2676 root 20 0 98.6m 416 416 D 9.3 0.0 0:00.87 cat 2626 kamezawa 20 0 15192 1312 920 R 0.3 0.0 0:00.28 top 1 root 20 0 19384 1496 1204 S 0.0 0.0 0:00.66 init 2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd 3 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/0 4 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0 [akpm@linux-foundation.org: make percpu_charge_mutex static, tweak comments] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reviewed-by: Michal Hocko <mhocko@suse.cz> Tested-by: Ying Han <yinghan@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c54
1 files changed, 38 insertions, 16 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0b1a32cbd74d..c39a177bb641 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -359,7 +359,7 @@ enum charge_type {
359static void mem_cgroup_get(struct mem_cgroup *mem); 359static void mem_cgroup_get(struct mem_cgroup *mem);
360static void mem_cgroup_put(struct mem_cgroup *mem); 360static void mem_cgroup_put(struct mem_cgroup *mem);
361static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); 361static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
362static void drain_all_stock_async(void); 362static void drain_all_stock_async(struct mem_cgroup *mem);
363 363
364static struct mem_cgroup_per_zone * 364static struct mem_cgroup_per_zone *
365mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) 365mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
@@ -1671,7 +1671,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1671 if (victim == root_mem) { 1671 if (victim == root_mem) {
1672 loop++; 1672 loop++;
1673 if (loop >= 1) 1673 if (loop >= 1)
1674 drain_all_stock_async(); 1674 drain_all_stock_async(root_mem);
1675 if (loop >= 2) { 1675 if (loop >= 2) {
1676 /* 1676 /*
1677 * If we have not been able to reclaim 1677 * If we have not been able to reclaim
@@ -1934,9 +1934,11 @@ struct memcg_stock_pcp {
1934 struct mem_cgroup *cached; /* this never be root cgroup */ 1934 struct mem_cgroup *cached; /* this never be root cgroup */
1935 unsigned int nr_pages; 1935 unsigned int nr_pages;
1936 struct work_struct work; 1936 struct work_struct work;
1937 unsigned long flags;
1938#define FLUSHING_CACHED_CHARGE (0)
1937}; 1939};
1938static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); 1940static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
1939static atomic_t memcg_drain_count; 1941static DEFINE_MUTEX(percpu_charge_mutex);
1940 1942
1941/* 1943/*
1942 * Try to consume stocked charge on this cpu. If success, one page is consumed 1944 * Try to consume stocked charge on this cpu. If success, one page is consumed
@@ -1984,6 +1986,7 @@ static void drain_local_stock(struct work_struct *dummy)
1984{ 1986{
1985 struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock); 1987 struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);
1986 drain_stock(stock); 1988 drain_stock(stock);
1989 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
1987} 1990}
1988 1991
1989/* 1992/*
@@ -2008,26 +2011,45 @@ static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages)
2008 * expects some charges will be back to res_counter later but cannot wait for 2011 * expects some charges will be back to res_counter later but cannot wait for
2009 * it. 2012 * it.
2010 */ 2013 */
2011static void drain_all_stock_async(void) 2014static void drain_all_stock_async(struct mem_cgroup *root_mem)
2012{ 2015{
2013 int cpu; 2016 int cpu, curcpu;
2014 /* This function is for scheduling "drain" in asynchronous way. 2017 /*
2015 * The result of "drain" is not directly handled by callers. Then, 2018 * If someone calls draining, avoid adding more kworker runs.
2016 * if someone is calling drain, we don't have to call drain more.
2017 * Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if
2018 * there is a race. We just do loose check here.
2019 */ 2019 */
2020 if (atomic_read(&memcg_drain_count)) 2020 if (!mutex_trylock(&percpu_charge_mutex))
2021 return; 2021 return;
2022 /* Notify other cpus that system-wide "drain" is running */ 2022 /* Notify other cpus that system-wide "drain" is running */
2023 atomic_inc(&memcg_drain_count);
2024 get_online_cpus(); 2023 get_online_cpus();
2024 /*
2025 * Get a hint for avoiding draining charges on the current cpu,
2026 * which must be exhausted by our charging. It is not required that
2027 * this be a precise check, so we use raw_smp_processor_id() instead of
2028 * getcpu()/putcpu().
2029 */
2030 curcpu = raw_smp_processor_id();
2025 for_each_online_cpu(cpu) { 2031 for_each_online_cpu(cpu) {
2026 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); 2032 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2027 schedule_work_on(cpu, &stock->work); 2033 struct mem_cgroup *mem;
2034
2035 if (cpu == curcpu)
2036 continue;
2037
2038 mem = stock->cached;
2039 if (!mem)
2040 continue;
2041 if (mem != root_mem) {
2042 if (!root_mem->use_hierarchy)
2043 continue;
2044 /* check whether "mem" is under tree of "root_mem" */
2045 if (!css_is_ancestor(&mem->css, &root_mem->css))
2046 continue;
2047 }
2048 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
2049 schedule_work_on(cpu, &stock->work);
2028 } 2050 }
2029 put_online_cpus(); 2051 put_online_cpus();
2030 atomic_dec(&memcg_drain_count); 2052 mutex_unlock(&percpu_charge_mutex);
2031 /* We don't wait for flush_work */ 2053 /* We don't wait for flush_work */
2032} 2054}
2033 2055
@@ -2035,9 +2057,9 @@ static void drain_all_stock_async(void)
2035static void drain_all_stock_sync(void) 2057static void drain_all_stock_sync(void)
2036{ 2058{
2037 /* called when force_empty is called */ 2059 /* called when force_empty is called */
2038 atomic_inc(&memcg_drain_count); 2060 mutex_lock(&percpu_charge_mutex);
2039 schedule_on_each_cpu(drain_local_stock); 2061 schedule_on_each_cpu(drain_local_stock);
2040 atomic_dec(&memcg_drain_count); 2062 mutex_unlock(&percpu_charge_mutex);
2041} 2063}
2042 2064
2043/* 2065/*