aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-04-02 19:57:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-02 22:04:56 -0400
commita3b2d692690aef228e493b1beaafe5364cab3237 (patch)
treed3ad99c5370462861a1b918b4136e7bd7ad78e52
parent3c776e64660028236313f0e54f3a9945764422df (diff)
cgroups: use css id in swap cgroup for saving memory v5
Try to use CSS ID for records in swap_cgroup. By this, on 64bit machine, size of swap_cgroup goes down to 2 bytes from 8bytes. This means, when 2GB of swap is equipped, (assume the page size is 4096bytes) From size of swap_cgroup = 2G/4k * 8 = 4Mbytes. To size of swap_cgroup = 2G/4k * 2 = 1Mbytes. Reduction is large. Of course, there are trade-offs. This CSS ID will add overhead to swap-in/swap-out/swap-free. But in general, - swap is a resource which the user tend to avoid use. - If swap is never used, swap_cgroup area is not used. - Reading traditional manuals, size of swap should be proportional to size of memory. Memory size of machine is increasing now. I think reducing size of swap_cgroup makes sense. Note: - ID->CSS lookup routine has no locks, it's under RCU-Read-Side. - memcg can be obsolete at rmdir() but not freed while refcnt from swap_cgroup is available. Changelog v4->v5: - reworked on to memcg-charge-swapcache-to-proper-memcg.patch Changlog ->v4: - fixed not configured case. - deleted unnecessary comments. - fixed NULL pointer bug. - fixed message in dmesg. [nishimura@mxp.nes.nec.co.jp: css_tryget can be called twice in !PageCgroupUsed case] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/page_cgroup.h13
-rw-r--r--mm/memcontrol.c74
-rw-r--r--mm/page_cgroup.c32
3 files changed, 82 insertions, 37 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 602cc1fdee90..7339c7bf7331 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void)
91 91
92#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 92#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
93#include <linux/swap.h> 93#include <linux/swap.h>
94extern struct mem_cgroup * 94extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
95swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); 95extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
96extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
97extern int swap_cgroup_swapon(int type, unsigned long max_pages); 96extern int swap_cgroup_swapon(int type, unsigned long max_pages);
98extern void swap_cgroup_swapoff(int type); 97extern void swap_cgroup_swapoff(int type);
99#else 98#else
100#include <linux/swap.h> 99#include <linux/swap.h>
101 100
102static inline 101static inline
103struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) 102unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
104{ 103{
105 return NULL; 104 return 0;
106} 105}
107 106
108static inline 107static inline
109struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) 108unsigned short lookup_swap_cgroup(swp_entry_t ent)
110{ 109{
111 return NULL; 110 return 0;
112} 111}
113 112
114static inline int 113static inline int
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 81b0ae8183d0..55dea5968464 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -991,10 +991,31 @@ nomem:
991 return -ENOMEM; 991 return -ENOMEM;
992} 992}
993 993
994
995/*
996 * A helper function to get mem_cgroup from ID. must be called under
997 * rcu_read_lock(). The caller must check css_is_removed() or some if
998 * it's concern. (dropping refcnt from swap can be called against removed
999 * memcg.)
1000 */
1001static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
1002{
1003 struct cgroup_subsys_state *css;
1004
1005 /* ID 0 is unused ID */
1006 if (!id)
1007 return NULL;
1008 css = css_lookup(&mem_cgroup_subsys, id);
1009 if (!css)
1010 return NULL;
1011 return container_of(css, struct mem_cgroup, css);
1012}
1013
994static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) 1014static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
995{ 1015{
996 struct mem_cgroup *mem; 1016 struct mem_cgroup *mem;
997 struct page_cgroup *pc; 1017 struct page_cgroup *pc;
1018 unsigned short id;
998 swp_entry_t ent; 1019 swp_entry_t ent;
999 1020
1000 VM_BUG_ON(!PageLocked(page)); 1021 VM_BUG_ON(!PageLocked(page));
@@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
1006 /* 1027 /*
1007 * Used bit of swapcache is solid under page lock. 1028 * Used bit of swapcache is solid under page lock.
1008 */ 1029 */
1009 if (PageCgroupUsed(pc)) 1030 if (PageCgroupUsed(pc)) {
1010 mem = pc->mem_cgroup; 1031 mem = pc->mem_cgroup;
1011 else { 1032 if (mem && !css_tryget(&mem->css))
1033 mem = NULL;
1034 } else {
1012 ent.val = page_private(page); 1035 ent.val = page_private(page);
1013 mem = lookup_swap_cgroup(ent); 1036 id = lookup_swap_cgroup(ent);
1037 rcu_read_lock();
1038 mem = mem_cgroup_lookup(id);
1039 if (mem && !css_tryget(&mem->css))
1040 mem = NULL;
1041 rcu_read_unlock();
1014 } 1042 }
1015 if (!mem)
1016 return NULL;
1017 if (!css_tryget(&mem->css))
1018 return NULL;
1019 return mem; 1043 return mem;
1020} 1044}
1021 1045
@@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1276 1300
1277 if (do_swap_account && !ret && PageSwapCache(page)) { 1301 if (do_swap_account && !ret && PageSwapCache(page)) {
1278 swp_entry_t ent = {.val = page_private(page)}; 1302 swp_entry_t ent = {.val = page_private(page)};
1303 unsigned short id;
1279 /* avoid double counting */ 1304 /* avoid double counting */
1280 mem = swap_cgroup_record(ent, NULL); 1305 id = swap_cgroup_record(ent, 0);
1306 rcu_read_lock();
1307 mem = mem_cgroup_lookup(id);
1281 if (mem) { 1308 if (mem) {
1309 /*
1310 * We did swap-in. Then, this entry is doubly counted
1311 * both in mem and memsw. We uncharge it, here.
1312 * Recorded ID can be obsolete. We avoid calling
1313 * css_tryget()
1314 */
1282 res_counter_uncharge(&mem->memsw, PAGE_SIZE); 1315 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1283 mem_cgroup_put(mem); 1316 mem_cgroup_put(mem);
1284 } 1317 }
1318 rcu_read_unlock();
1285 } 1319 }
1286 return ret; 1320 return ret;
1287} 1321}
@@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
1346 */ 1380 */
1347 if (do_swap_account && PageSwapCache(page)) { 1381 if (do_swap_account && PageSwapCache(page)) {
1348 swp_entry_t ent = {.val = page_private(page)}; 1382 swp_entry_t ent = {.val = page_private(page)};
1383 unsigned short id;
1349 struct mem_cgroup *memcg; 1384 struct mem_cgroup *memcg;
1350 memcg = swap_cgroup_record(ent, NULL); 1385
1386 id = swap_cgroup_record(ent, 0);
1387 rcu_read_lock();
1388 memcg = mem_cgroup_lookup(id);
1351 if (memcg) { 1389 if (memcg) {
1390 /*
1391 * This recorded memcg can be obsolete one. So, avoid
1392 * calling css_tryget
1393 */
1352 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 1394 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1353 mem_cgroup_put(memcg); 1395 mem_cgroup_put(memcg);
1354 } 1396 }
1355 1397 rcu_read_unlock();
1356 } 1398 }
1357 /* add this page(page_cgroup) to the LRU we want. */ 1399 /* add this page(page_cgroup) to the LRU we want. */
1358 1400
@@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
1473 MEM_CGROUP_CHARGE_TYPE_SWAPOUT); 1515 MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
1474 /* record memcg information */ 1516 /* record memcg information */
1475 if (do_swap_account && memcg) { 1517 if (do_swap_account && memcg) {
1476 swap_cgroup_record(ent, memcg); 1518 swap_cgroup_record(ent, css_id(&memcg->css));
1477 mem_cgroup_get(memcg); 1519 mem_cgroup_get(memcg);
1478 } 1520 }
1479 if (memcg) 1521 if (memcg)
@@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
1488void mem_cgroup_uncharge_swap(swp_entry_t ent) 1530void mem_cgroup_uncharge_swap(swp_entry_t ent)
1489{ 1531{
1490 struct mem_cgroup *memcg; 1532 struct mem_cgroup *memcg;
1533 unsigned short id;
1491 1534
1492 if (!do_swap_account) 1535 if (!do_swap_account)
1493 return; 1536 return;
1494 1537
1495 memcg = swap_cgroup_record(ent, NULL); 1538 id = swap_cgroup_record(ent, 0);
1539 rcu_read_lock();
1540 memcg = mem_cgroup_lookup(id);
1496 if (memcg) { 1541 if (memcg) {
1542 /*
1543 * We uncharge this because swap is freed.
1544 * This memcg can be obsolete one. We avoid calling css_tryget
1545 */
1497 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 1546 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1498 mem_cgroup_put(memcg); 1547 mem_cgroup_put(memcg);
1499 } 1548 }
1549 rcu_read_unlock();
1500} 1550}
1501#endif 1551#endif
1502 1552
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index ceecfbb143fa..ebf81074bed4 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {
285 285
286struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; 286struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
287 287
288/*
289 * This 8bytes seems big..maybe we can reduce this when we can use "id" for
290 * cgroup rather than pointer.
291 */
292struct swap_cgroup { 288struct swap_cgroup {
293 struct mem_cgroup *val; 289 unsigned short id;
294}; 290};
295#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) 291#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
296#define SC_POS_MASK (SC_PER_PAGE - 1) 292#define SC_POS_MASK (SC_PER_PAGE - 1)
@@ -342,10 +338,10 @@ not_enough_page:
342 * @ent: swap entry to be recorded into 338 * @ent: swap entry to be recorded into
343 * @mem: mem_cgroup to be recorded 339 * @mem: mem_cgroup to be recorded
344 * 340 *
345 * Returns old value at success, NULL at failure. 341 * Returns old value at success, 0 at failure.
346 * (Of course, old value can be NULL.) 342 * (Of course, old value can be 0.)
347 */ 343 */
348struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) 344unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
349{ 345{
350 int type = swp_type(ent); 346 int type = swp_type(ent);
351 unsigned long offset = swp_offset(ent); 347 unsigned long offset = swp_offset(ent);
@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
354 struct swap_cgroup_ctrl *ctrl; 350 struct swap_cgroup_ctrl *ctrl;
355 struct page *mappage; 351 struct page *mappage;
356 struct swap_cgroup *sc; 352 struct swap_cgroup *sc;
357 struct mem_cgroup *old; 353 unsigned short old;
358 354
359 if (!do_swap_account) 355 if (!do_swap_account)
360 return NULL; 356 return 0;
361 357
362 ctrl = &swap_cgroup_ctrl[type]; 358 ctrl = &swap_cgroup_ctrl[type];
363 359
364 mappage = ctrl->map[idx]; 360 mappage = ctrl->map[idx];
365 sc = page_address(mappage); 361 sc = page_address(mappage);
366 sc += pos; 362 sc += pos;
367 old = sc->val; 363 old = sc->id;
368 sc->val = mem; 364 sc->id = id;
369 365
370 return old; 366 return old;
371} 367}
@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
374 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry 370 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
375 * @ent: swap entry to be looked up. 371 * @ent: swap entry to be looked up.
376 * 372 *
377 * Returns pointer to mem_cgroup at success. NULL at failure. 373 * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
378 */ 374 */
379struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) 375unsigned short lookup_swap_cgroup(swp_entry_t ent)
380{ 376{
381 int type = swp_type(ent); 377 int type = swp_type(ent);
382 unsigned long offset = swp_offset(ent); 378 unsigned long offset = swp_offset(ent);
@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
385 struct swap_cgroup_ctrl *ctrl; 381 struct swap_cgroup_ctrl *ctrl;
386 struct page *mappage; 382 struct page *mappage;
387 struct swap_cgroup *sc; 383 struct swap_cgroup *sc;
388 struct mem_cgroup *ret; 384 unsigned short ret;
389 385
390 if (!do_swap_account) 386 if (!do_swap_account)
391 return NULL; 387 return 0;
392 388
393 ctrl = &swap_cgroup_ctrl[type]; 389 ctrl = &swap_cgroup_ctrl[type];
394 mappage = ctrl->map[idx]; 390 mappage = ctrl->map[idx];
395 sc = page_address(mappage); 391 sc = page_address(mappage);
396 sc += pos; 392 sc += pos;
397 ret = sc->val; 393 ret = sc->id;
398 return ret; 394 return ret;
399} 395}
400 396
@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
432 428
433 printk(KERN_INFO 429 printk(KERN_INFO
434 "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" 430 "swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
435 " and %ld bytes to hold mem_cgroup pointers on swap\n", 431 " and %ld bytes to hold mem_cgroup information per swap ents\n",
436 array_size, length * PAGE_SIZE); 432 array_size, length * PAGE_SIZE);
437 printk(KERN_INFO 433 printk(KERN_INFO
438 "swap_cgroup can be disabled by noswapaccount boot option.\n"); 434 "swap_cgroup can be disabled by noswapaccount boot option.\n");