aboutsummaryrefslogtreecommitdiffstats
path: root/mm/truncate.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-12-15 19:47:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 10:20:07 -0500
commit569b846df54ffb2827b83ce3244c5f032394cba4 (patch)
tree77c5d373a5edf97710fab8777912971b99e84828 /mm/truncate.c
parentcd9b45b78a61e8df250e69385c74e729e5b66abf (diff)
memcg: coalesce uncharge during unmap/truncate
In massive parallel enviroment, res_counter can be a performance bottleneck. One strong techinque to reduce lock contention is reducing calls by coalescing some amount of calls into one. Considering charge/uncharge chatacteristic, - charge is done one by one via demand-paging. - uncharge is done by - in chunk at munmap, truncate, exit, execve... - one by one via vmscan/paging. It seems we have a chance to coalesce uncharges for improving scalability at unmap/truncation. This patch is a for coalescing uncharge. For avoiding scattering memcg's structure to functions under /mm, this patch adds memcg batch uncharge information to the task. A reason for per-task batching is for making use of caller's context information. We do batched uncharge (deleyed uncharge) when truncation/unmap occurs but do direct uncharge when uncharge is called by memory reclaim (vmscan.c). The degree of coalescing depends on callers - at invalidate/trucate... pagevec size - at unmap ....ZAP_BLOCK_SIZE (memory itself will be freed in this degree.) Then, we'll not coalescing too much. On x86-64 8cpu server, I tested overheads of memcg at page fault by running a program which does map/fault/unmap in a loop. Running a task per a cpu by taskset and see sum of the number of page faults in 60secs. [without memcg config] 40156968 page-faults # 0.085 M/sec ( +- 0.046% ) 27.67 cache-miss/faults [root cgroup] 36659599 page-faults # 0.077 M/sec ( +- 0.247% ) 31.58 miss/faults [in a child cgroup] 18444157 page-faults # 0.039 M/sec ( +- 0.133% ) 69.96 miss/faults [child with this patch] 27133719 page-faults # 0.057 M/sec ( +- 0.155% ) 47.16 miss/faults We can see some amounts of improvement. (root cgroup doesn't affected by this patch) Another patch for "charge" will follow this and above will be improved more. Changelog(since 2009/10/02): - renamed filed of memcg_batch (as pages to bytes, memsw to memsw_bytes) - some clean up and commentary/description updates. - added initialize code to copy_process(). (possible bug fix) Changelog(old): - fixed !CONFIG_MEM_CGROUP case. - rebased onto the latest mmotm + softlimit fix patches. - unified patch for callers - added commetns. - make ->do_batch as bool. - removed css_get() at el. We don't need it. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/truncate.c')
-rw-r--r--mm/truncate.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index 2c147a7e5f2c..342deee22684 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -272,6 +272,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
272 pagevec_release(&pvec); 272 pagevec_release(&pvec);
273 break; 273 break;
274 } 274 }
275 mem_cgroup_uncharge_start();
275 for (i = 0; i < pagevec_count(&pvec); i++) { 276 for (i = 0; i < pagevec_count(&pvec); i++) {
276 struct page *page = pvec.pages[i]; 277 struct page *page = pvec.pages[i];
277 278
@@ -286,6 +287,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
286 unlock_page(page); 287 unlock_page(page);
287 } 288 }
288 pagevec_release(&pvec); 289 pagevec_release(&pvec);
290 mem_cgroup_uncharge_end();
289 } 291 }
290} 292}
291EXPORT_SYMBOL(truncate_inode_pages_range); 293EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -327,6 +329,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
327 pagevec_init(&pvec, 0); 329 pagevec_init(&pvec, 0);
328 while (next <= end && 330 while (next <= end &&
329 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 331 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
332 mem_cgroup_uncharge_start();
330 for (i = 0; i < pagevec_count(&pvec); i++) { 333 for (i = 0; i < pagevec_count(&pvec); i++) {
331 struct page *page = pvec.pages[i]; 334 struct page *page = pvec.pages[i];
332 pgoff_t index; 335 pgoff_t index;
@@ -354,6 +357,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
354 break; 357 break;
355 } 358 }
356 pagevec_release(&pvec); 359 pagevec_release(&pvec);
360 mem_cgroup_uncharge_end();
357 cond_resched(); 361 cond_resched();
358 } 362 }
359 return ret; 363 return ret;
@@ -428,6 +432,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
428 while (next <= end && !wrapped && 432 while (next <= end && !wrapped &&
429 pagevec_lookup(&pvec, mapping, next, 433 pagevec_lookup(&pvec, mapping, next,
430 min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 434 min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
435 mem_cgroup_uncharge_start();
431 for (i = 0; i < pagevec_count(&pvec); i++) { 436 for (i = 0; i < pagevec_count(&pvec); i++) {
432 struct page *page = pvec.pages[i]; 437 struct page *page = pvec.pages[i];
433 pgoff_t page_index; 438 pgoff_t page_index;
@@ -477,6 +482,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
477 unlock_page(page); 482 unlock_page(page);
478 } 483 }
479 pagevec_release(&pvec); 484 pagevec_release(&pvec);
485 mem_cgroup_uncharge_end();
480 cond_resched(); 486 cond_resched();
481 } 487 }
482 return ret; 488 return ret;