diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-12-15 19:47:03 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-16 10:20:07 -0500 |
commit | 569b846df54ffb2827b83ce3244c5f032394cba4 (patch) | |
tree | 77c5d373a5edf97710fab8777912971b99e84828 /include | |
parent | cd9b45b78a61e8df250e69385c74e729e5b66abf (diff) |
memcg: coalesce uncharge during unmap/truncate
In massive parallel enviroment, res_counter can be a performance
bottleneck. One strong techinque to reduce lock contention is reducing
calls by coalescing some amount of calls into one.
Considering charge/uncharge chatacteristic,
- charge is done one by one via demand-paging.
- uncharge is done by
- in chunk at munmap, truncate, exit, execve...
- one by one via vmscan/paging.
It seems we have a chance to coalesce uncharges for improving scalability
at unmap/truncation.
This patch is a for coalescing uncharge. For avoiding scattering memcg's
structure to functions under /mm, this patch adds memcg batch uncharge
information to the task. A reason for per-task batching is for making use
of caller's context information. We do batched uncharge (deleyed
uncharge) when truncation/unmap occurs but do direct uncharge when
uncharge is called by memory reclaim (vmscan.c).
The degree of coalescing depends on callers
- at invalidate/trucate... pagevec size
- at unmap ....ZAP_BLOCK_SIZE
(memory itself will be freed in this degree.)
Then, we'll not coalescing too much.
On x86-64 8cpu server, I tested overheads of memcg at page fault by
running a program which does map/fault/unmap in a loop. Running
a task per a cpu by taskset and see sum of the number of page faults
in 60secs.
[without memcg config]
40156968 page-faults # 0.085 M/sec ( +- 0.046% )
27.67 cache-miss/faults
[root cgroup]
36659599 page-faults # 0.077 M/sec ( +- 0.247% )
31.58 miss/faults
[in a child cgroup]
18444157 page-faults # 0.039 M/sec ( +- 0.133% )
69.96 miss/faults
[child with this patch]
27133719 page-faults # 0.057 M/sec ( +- 0.155% )
47.16 miss/faults
We can see some amounts of improvement.
(root cgroup doesn't affected by this patch)
Another patch for "charge" will follow this and above will be improved more.
Changelog(since 2009/10/02):
- renamed filed of memcg_batch (as pages to bytes, memsw to memsw_bytes)
- some clean up and commentary/description updates.
- added initialize code to copy_process(). (possible bug fix)
Changelog(old):
- fixed !CONFIG_MEM_CGROUP case.
- rebased onto the latest mmotm + softlimit fix patches.
- unified patch for callers
- added commetns.
- make ->do_batch as bool.
- removed css_get() at el. We don't need it.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/memcontrol.h | 13 | ||||
-rw-r--r-- | include/linux/sched.h | 8 |
2 files changed, 21 insertions, 0 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bf9213b2db8f..91300c972e76 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -54,6 +54,11 @@ extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); | |||
54 | extern void mem_cgroup_del_lru(struct page *page); | 54 | extern void mem_cgroup_del_lru(struct page *page); |
55 | extern void mem_cgroup_move_lists(struct page *page, | 55 | extern void mem_cgroup_move_lists(struct page *page, |
56 | enum lru_list from, enum lru_list to); | 56 | enum lru_list from, enum lru_list to); |
57 | |||
58 | /* For coalescing uncharge for reducing memcg' overhead*/ | ||
59 | extern void mem_cgroup_uncharge_start(void); | ||
60 | extern void mem_cgroup_uncharge_end(void); | ||
61 | |||
57 | extern void mem_cgroup_uncharge_page(struct page *page); | 62 | extern void mem_cgroup_uncharge_page(struct page *page); |
58 | extern void mem_cgroup_uncharge_cache_page(struct page *page); | 63 | extern void mem_cgroup_uncharge_cache_page(struct page *page); |
59 | extern int mem_cgroup_shmem_charge_fallback(struct page *page, | 64 | extern int mem_cgroup_shmem_charge_fallback(struct page *page, |
@@ -151,6 +156,14 @@ static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) | |||
151 | { | 156 | { |
152 | } | 157 | } |
153 | 158 | ||
159 | static inline void mem_cgroup_uncharge_start(void) | ||
160 | { | ||
161 | } | ||
162 | |||
163 | static inline void mem_cgroup_uncharge_end(void) | ||
164 | { | ||
165 | } | ||
166 | |||
154 | static inline void mem_cgroup_uncharge_page(struct page *page) | 167 | static inline void mem_cgroup_uncharge_page(struct page *page) |
155 | { | 168 | { |
156 | } | 169 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c858f38e81a..f4c145410a8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1544,6 +1544,14 @@ struct task_struct { | |||
1544 | unsigned long trace_recursion; | 1544 | unsigned long trace_recursion; |
1545 | #endif /* CONFIG_TRACING */ | 1545 | #endif /* CONFIG_TRACING */ |
1546 | unsigned long stack_start; | 1546 | unsigned long stack_start; |
1547 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ | ||
1548 | struct memcg_batch_info { | ||
1549 | int do_batch; /* incremented when batch uncharge started */ | ||
1550 | struct mem_cgroup *memcg; /* target memcg of uncharge */ | ||
1551 | unsigned long bytes; /* uncharged usage */ | ||
1552 | unsigned long memsw_bytes; /* uncharged mem+swap usage */ | ||
1553 | } memcg_batch; | ||
1554 | #endif | ||
1547 | }; | 1555 | }; |
1548 | 1556 | ||
1549 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1557 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |