diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-04-02 19:57:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-02 22:04:56 -0400 |
commit | a3b2d692690aef228e493b1beaafe5364cab3237 (patch) | |
tree | d3ad99c5370462861a1b918b4136e7bd7ad78e52 | |
parent | 3c776e64660028236313f0e54f3a9945764422df (diff) |
cgroups: use css id in swap cgroup for saving memory v5
Try to use CSS ID for records in swap_cgroup. By this, on 64bit machine,
size of swap_cgroup goes down to 2 bytes from 8bytes.
This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)
From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
To size of swap_cgroup = 2G/4k * 2 = 1Mbytes.
Reduction is large. Of course, there are trade-offs. This CSS ID will
add overhead to swap-in/swap-out/swap-free.
But in general,
- swap is a resource which the user tend to avoid use.
- If swap is never used, swap_cgroup area is not used.
- Reading traditional manuals, size of swap should be proportional to
size of memory. Memory size of machine is increasing now.
I think reducing size of swap_cgroup makes sense.
Note:
- ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
- memcg can be obsolete at rmdir() but not freed while refcnt from
swap_cgroup is available.
Changelog v4->v5:
- reworked on to memcg-charge-swapcache-to-proper-memcg.patch
Changlog ->v4:
- fixed not configured case.
- deleted unnecessary comments.
- fixed NULL pointer bug.
- fixed message in dmesg.
[nishimura@mxp.nes.nec.co.jp: css_tryget can be called twice in !PageCgroupUsed case]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/page_cgroup.h | 13 | ||||
-rw-r--r-- | mm/memcontrol.c | 74 | ||||
-rw-r--r-- | mm/page_cgroup.c | 32 |
3 files changed, 82 insertions, 37 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 602cc1fdee90..7339c7bf7331 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void) | |||
91 | 91 | ||
92 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 92 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
93 | #include <linux/swap.h> | 93 | #include <linux/swap.h> |
94 | extern struct mem_cgroup * | 94 | extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); |
95 | swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); | 95 | extern unsigned short lookup_swap_cgroup(swp_entry_t ent); |
96 | extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); | ||
97 | extern int swap_cgroup_swapon(int type, unsigned long max_pages); | 96 | extern int swap_cgroup_swapon(int type, unsigned long max_pages); |
98 | extern void swap_cgroup_swapoff(int type); | 97 | extern void swap_cgroup_swapoff(int type); |
99 | #else | 98 | #else |
100 | #include <linux/swap.h> | 99 | #include <linux/swap.h> |
101 | 100 | ||
102 | static inline | 101 | static inline |
103 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | 102 | unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) |
104 | { | 103 | { |
105 | return NULL; | 104 | return 0; |
106 | } | 105 | } |
107 | 106 | ||
108 | static inline | 107 | static inline |
109 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | 108 | unsigned short lookup_swap_cgroup(swp_entry_t ent) |
110 | { | 109 | { |
111 | return NULL; | 110 | return 0; |
112 | } | 111 | } |
113 | 112 | ||
114 | static inline int | 113 | static inline int |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 81b0ae8183d0..55dea5968464 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -991,10 +991,31 @@ nomem: | |||
991 | return -ENOMEM; | 991 | return -ENOMEM; |
992 | } | 992 | } |
993 | 993 | ||
994 | |||
995 | /* | ||
996 | * A helper function to get mem_cgroup from ID. must be called under | ||
997 | * rcu_read_lock(). The caller must check css_is_removed() or some if | ||
998 | * it's concern. (dropping refcnt from swap can be called against removed | ||
999 | * memcg.) | ||
1000 | */ | ||
1001 | static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | ||
1002 | { | ||
1003 | struct cgroup_subsys_state *css; | ||
1004 | |||
1005 | /* ID 0 is unused ID */ | ||
1006 | if (!id) | ||
1007 | return NULL; | ||
1008 | css = css_lookup(&mem_cgroup_subsys, id); | ||
1009 | if (!css) | ||
1010 | return NULL; | ||
1011 | return container_of(css, struct mem_cgroup, css); | ||
1012 | } | ||
1013 | |||
994 | static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | 1014 | static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) |
995 | { | 1015 | { |
996 | struct mem_cgroup *mem; | 1016 | struct mem_cgroup *mem; |
997 | struct page_cgroup *pc; | 1017 | struct page_cgroup *pc; |
1018 | unsigned short id; | ||
998 | swp_entry_t ent; | 1019 | swp_entry_t ent; |
999 | 1020 | ||
1000 | VM_BUG_ON(!PageLocked(page)); | 1021 | VM_BUG_ON(!PageLocked(page)); |
@@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1006 | /* | 1027 | /* |
1007 | * Used bit of swapcache is solid under page lock. | 1028 | * Used bit of swapcache is solid under page lock. |
1008 | */ | 1029 | */ |
1009 | if (PageCgroupUsed(pc)) | 1030 | if (PageCgroupUsed(pc)) { |
1010 | mem = pc->mem_cgroup; | 1031 | mem = pc->mem_cgroup; |
1011 | else { | 1032 | if (mem && !css_tryget(&mem->css)) |
1033 | mem = NULL; | ||
1034 | } else { | ||
1012 | ent.val = page_private(page); | 1035 | ent.val = page_private(page); |
1013 | mem = lookup_swap_cgroup(ent); | 1036 | id = lookup_swap_cgroup(ent); |
1037 | rcu_read_lock(); | ||
1038 | mem = mem_cgroup_lookup(id); | ||
1039 | if (mem && !css_tryget(&mem->css)) | ||
1040 | mem = NULL; | ||
1041 | rcu_read_unlock(); | ||
1014 | } | 1042 | } |
1015 | if (!mem) | ||
1016 | return NULL; | ||
1017 | if (!css_tryget(&mem->css)) | ||
1018 | return NULL; | ||
1019 | return mem; | 1043 | return mem; |
1020 | } | 1044 | } |
1021 | 1045 | ||
@@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
1276 | 1300 | ||
1277 | if (do_swap_account && !ret && PageSwapCache(page)) { | 1301 | if (do_swap_account && !ret && PageSwapCache(page)) { |
1278 | swp_entry_t ent = {.val = page_private(page)}; | 1302 | swp_entry_t ent = {.val = page_private(page)}; |
1303 | unsigned short id; | ||
1279 | /* avoid double counting */ | 1304 | /* avoid double counting */ |
1280 | mem = swap_cgroup_record(ent, NULL); | 1305 | id = swap_cgroup_record(ent, 0); |
1306 | rcu_read_lock(); | ||
1307 | mem = mem_cgroup_lookup(id); | ||
1281 | if (mem) { | 1308 | if (mem) { |
1309 | /* | ||
1310 | * We did swap-in. Then, this entry is doubly counted | ||
1311 | * both in mem and memsw. We uncharge it, here. | ||
1312 | * Recorded ID can be obsolete. We avoid calling | ||
1313 | * css_tryget() | ||
1314 | */ | ||
1282 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 1315 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1283 | mem_cgroup_put(mem); | 1316 | mem_cgroup_put(mem); |
1284 | } | 1317 | } |
1318 | rcu_read_unlock(); | ||
1285 | } | 1319 | } |
1286 | return ret; | 1320 | return ret; |
1287 | } | 1321 | } |
@@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | |||
1346 | */ | 1380 | */ |
1347 | if (do_swap_account && PageSwapCache(page)) { | 1381 | if (do_swap_account && PageSwapCache(page)) { |
1348 | swp_entry_t ent = {.val = page_private(page)}; | 1382 | swp_entry_t ent = {.val = page_private(page)}; |
1383 | unsigned short id; | ||
1349 | struct mem_cgroup *memcg; | 1384 | struct mem_cgroup *memcg; |
1350 | memcg = swap_cgroup_record(ent, NULL); | 1385 | |
1386 | id = swap_cgroup_record(ent, 0); | ||
1387 | rcu_read_lock(); | ||
1388 | memcg = mem_cgroup_lookup(id); | ||
1351 | if (memcg) { | 1389 | if (memcg) { |
1390 | /* | ||
1391 | * This recorded memcg can be obsolete one. So, avoid | ||
1392 | * calling css_tryget | ||
1393 | */ | ||
1352 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 1394 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1353 | mem_cgroup_put(memcg); | 1395 | mem_cgroup_put(memcg); |
1354 | } | 1396 | } |
1355 | 1397 | rcu_read_unlock(); | |
1356 | } | 1398 | } |
1357 | /* add this page(page_cgroup) to the LRU we want. */ | 1399 | /* add this page(page_cgroup) to the LRU we want. */ |
1358 | 1400 | ||
@@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | |||
1473 | MEM_CGROUP_CHARGE_TYPE_SWAPOUT); | 1515 | MEM_CGROUP_CHARGE_TYPE_SWAPOUT); |
1474 | /* record memcg information */ | 1516 | /* record memcg information */ |
1475 | if (do_swap_account && memcg) { | 1517 | if (do_swap_account && memcg) { |
1476 | swap_cgroup_record(ent, memcg); | 1518 | swap_cgroup_record(ent, css_id(&memcg->css)); |
1477 | mem_cgroup_get(memcg); | 1519 | mem_cgroup_get(memcg); |
1478 | } | 1520 | } |
1479 | if (memcg) | 1521 | if (memcg) |
@@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | |||
1488 | void mem_cgroup_uncharge_swap(swp_entry_t ent) | 1530 | void mem_cgroup_uncharge_swap(swp_entry_t ent) |
1489 | { | 1531 | { |
1490 | struct mem_cgroup *memcg; | 1532 | struct mem_cgroup *memcg; |
1533 | unsigned short id; | ||
1491 | 1534 | ||
1492 | if (!do_swap_account) | 1535 | if (!do_swap_account) |
1493 | return; | 1536 | return; |
1494 | 1537 | ||
1495 | memcg = swap_cgroup_record(ent, NULL); | 1538 | id = swap_cgroup_record(ent, 0); |
1539 | rcu_read_lock(); | ||
1540 | memcg = mem_cgroup_lookup(id); | ||
1496 | if (memcg) { | 1541 | if (memcg) { |
1542 | /* | ||
1543 | * We uncharge this because swap is freed. | ||
1544 | * This memcg can be obsolete one. We avoid calling css_tryget | ||
1545 | */ | ||
1497 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 1546 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1498 | mem_cgroup_put(memcg); | 1547 | mem_cgroup_put(memcg); |
1499 | } | 1548 | } |
1549 | rcu_read_unlock(); | ||
1500 | } | 1550 | } |
1501 | #endif | 1551 | #endif |
1502 | 1552 | ||
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index ceecfbb143fa..ebf81074bed4 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl { | |||
285 | 285 | ||
286 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; | 286 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; |
287 | 287 | ||
288 | /* | ||
289 | * This 8bytes seems big..maybe we can reduce this when we can use "id" for | ||
290 | * cgroup rather than pointer. | ||
291 | */ | ||
292 | struct swap_cgroup { | 288 | struct swap_cgroup { |
293 | struct mem_cgroup *val; | 289 | unsigned short id; |
294 | }; | 290 | }; |
295 | #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) | 291 | #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) |
296 | #define SC_POS_MASK (SC_PER_PAGE - 1) | 292 | #define SC_POS_MASK (SC_PER_PAGE - 1) |
@@ -342,10 +338,10 @@ not_enough_page: | |||
342 | * @ent: swap entry to be recorded into | 338 | * @ent: swap entry to be recorded into |
343 | * @mem: mem_cgroup to be recorded | 339 | * @mem: mem_cgroup to be recorded |
344 | * | 340 | * |
345 | * Returns old value at success, NULL at failure. | 341 | * Returns old value at success, 0 at failure. |
346 | * (Of course, old value can be NULL.) | 342 | * (Of course, old value can be 0.) |
347 | */ | 343 | */ |
348 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | 344 | unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) |
349 | { | 345 | { |
350 | int type = swp_type(ent); | 346 | int type = swp_type(ent); |
351 | unsigned long offset = swp_offset(ent); | 347 | unsigned long offset = swp_offset(ent); |
@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | |||
354 | struct swap_cgroup_ctrl *ctrl; | 350 | struct swap_cgroup_ctrl *ctrl; |
355 | struct page *mappage; | 351 | struct page *mappage; |
356 | struct swap_cgroup *sc; | 352 | struct swap_cgroup *sc; |
357 | struct mem_cgroup *old; | 353 | unsigned short old; |
358 | 354 | ||
359 | if (!do_swap_account) | 355 | if (!do_swap_account) |
360 | return NULL; | 356 | return 0; |
361 | 357 | ||
362 | ctrl = &swap_cgroup_ctrl[type]; | 358 | ctrl = &swap_cgroup_ctrl[type]; |
363 | 359 | ||
364 | mappage = ctrl->map[idx]; | 360 | mappage = ctrl->map[idx]; |
365 | sc = page_address(mappage); | 361 | sc = page_address(mappage); |
366 | sc += pos; | 362 | sc += pos; |
367 | old = sc->val; | 363 | old = sc->id; |
368 | sc->val = mem; | 364 | sc->id = id; |
369 | 365 | ||
370 | return old; | 366 | return old; |
371 | } | 367 | } |
@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | |||
374 | * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry | 370 | * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry |
375 | * @ent: swap entry to be looked up. | 371 | * @ent: swap entry to be looked up. |
376 | * | 372 | * |
377 | * Returns pointer to mem_cgroup at success. NULL at failure. | 373 | * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) |
378 | */ | 374 | */ |
379 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | 375 | unsigned short lookup_swap_cgroup(swp_entry_t ent) |
380 | { | 376 | { |
381 | int type = swp_type(ent); | 377 | int type = swp_type(ent); |
382 | unsigned long offset = swp_offset(ent); | 378 | unsigned long offset = swp_offset(ent); |
@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | |||
385 | struct swap_cgroup_ctrl *ctrl; | 381 | struct swap_cgroup_ctrl *ctrl; |
386 | struct page *mappage; | 382 | struct page *mappage; |
387 | struct swap_cgroup *sc; | 383 | struct swap_cgroup *sc; |
388 | struct mem_cgroup *ret; | 384 | unsigned short ret; |
389 | 385 | ||
390 | if (!do_swap_account) | 386 | if (!do_swap_account) |
391 | return NULL; | 387 | return 0; |
392 | 388 | ||
393 | ctrl = &swap_cgroup_ctrl[type]; | 389 | ctrl = &swap_cgroup_ctrl[type]; |
394 | mappage = ctrl->map[idx]; | 390 | mappage = ctrl->map[idx]; |
395 | sc = page_address(mappage); | 391 | sc = page_address(mappage); |
396 | sc += pos; | 392 | sc += pos; |
397 | ret = sc->val; | 393 | ret = sc->id; |
398 | return ret; | 394 | return ret; |
399 | } | 395 | } |
400 | 396 | ||
@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) | |||
432 | 428 | ||
433 | printk(KERN_INFO | 429 | printk(KERN_INFO |
434 | "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" | 430 | "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" |
435 | " and %ld bytes to hold mem_cgroup pointers on swap\n", | 431 | " and %ld bytes to hold mem_cgroup information per swap ents\n", |
436 | array_size, length * PAGE_SIZE); | 432 | array_size, length * PAGE_SIZE); |
437 | printk(KERN_INFO | 433 | printk(KERN_INFO |
438 | "swap_cgroup can be disabled by noswapaccount boot option.\n"); | 434 | "swap_cgroup can be disabled by noswapaccount boot option.\n"); |