aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:07:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:05 -0500
commit27a7faa0779dd13729196c1a818c294f44bbd1ee (patch)
tree30837689bf39eb734a8917f2c912e1b8ac0c28c0 /mm
parentc077719be8e9e6b55702117513d1b5f41d80404a (diff)
memcg: swap cgroup for remembering usage
For accounting swap, we need a record per swap entry, at least. This patch adds following function. - swap_cgroup_swapon() .... called from swapon - swap_cgroup_swapoff() ... called at the end of swapoff. - swap_cgroup_record() .... record information of swap entry. - swap_cgroup_lookup() .... lookup information of swap entry. This patch just implements "how to record information". No actual method for limit the usage of swap. These routine uses flat table to record and lookup. "wise" lookup system like radix-tree requires requires memory allocation at new records but swap-out is usually called under memory shortage (or memcg hits limit.) So, I used static allocation. (maybe dynamic allocation is not very hard but it adds additional memory allocation in memory shortage path.) Note1: In this, we use pointer to record information and this means 8bytes per swap entry. I think we can reduce this when we create "id of cgroup" in the range of 0-65535 or 0-255. Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reported-by: Hugh Dickins <hugh@veritas.com> Reported-by: Balbir Singh <balbir@linux.vnet.ibm.com> Reported-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_cgroup.c197
-rw-r--r--mm/swapfile.c10
2 files changed, 207 insertions, 0 deletions
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index df1e54a5ed19..685e7c8e1fd6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -8,6 +8,7 @@
8#include <linux/memory.h> 8#include <linux/memory.h>
9#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
10#include <linux/cgroup.h> 10#include <linux/cgroup.h>
11#include <linux/swapops.h>
11 12
12static void __meminit 13static void __meminit
13__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 14__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -270,3 +271,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
270} 271}
271 272
272#endif 273#endif
274
275
276#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
277
278static DEFINE_MUTEX(swap_cgroup_mutex);
279struct swap_cgroup_ctrl {
280 struct page **map;
281 unsigned long length;
282};
283
284struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
285
286/*
287 * This 8bytes seems big..maybe we can reduce this when we can use "id" for
288 * cgroup rather than pointer.
289 */
290struct swap_cgroup {
291 struct mem_cgroup *val;
292};
293#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
294#define SC_POS_MASK (SC_PER_PAGE - 1)
295
296/*
297 * SwapCgroup implements "lookup" and "exchange" operations.
298 * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
299 * against SwapCache. At swap_free(), this is accessed directly from swap.
300 *
301 * This means,
302 * - we have no race in "exchange" when we're accessed via SwapCache because
303 * SwapCache(and its swp_entry) is under lock.
304 * - When called via swap_free(), there is no user of this entry and no race.
305 * Then, we don't need lock around "exchange".
306 *
307 * TODO: we can push these buffers out to HIGHMEM.
308 */
309
310/*
311 * allocate buffer for swap_cgroup.
312 */
313static int swap_cgroup_prepare(int type)
314{
315 struct page *page;
316 struct swap_cgroup_ctrl *ctrl;
317 unsigned long idx, max;
318
319 if (!do_swap_account)
320 return 0;
321 ctrl = &swap_cgroup_ctrl[type];
322
323 for (idx = 0; idx < ctrl->length; idx++) {
324 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
325 if (!page)
326 goto not_enough_page;
327 ctrl->map[idx] = page;
328 }
329 return 0;
330not_enough_page:
331 max = idx;
332 for (idx = 0; idx < max; idx++)
333 __free_page(ctrl->map[idx]);
334
335 return -ENOMEM;
336}
337
338/**
339 * swap_cgroup_record - record mem_cgroup for this swp_entry.
340 * @ent: swap entry to be recorded into
341 * @mem: mem_cgroup to be recorded
342 *
343 * Returns old value at success, NULL at failure.
344 * (Of course, old value can be NULL.)
345 */
346struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
347{
348 int type = swp_type(ent);
349 unsigned long offset = swp_offset(ent);
350 unsigned long idx = offset / SC_PER_PAGE;
351 unsigned long pos = offset & SC_POS_MASK;
352 struct swap_cgroup_ctrl *ctrl;
353 struct page *mappage;
354 struct swap_cgroup *sc;
355 struct mem_cgroup *old;
356
357 if (!do_swap_account)
358 return NULL;
359
360 ctrl = &swap_cgroup_ctrl[type];
361
362 mappage = ctrl->map[idx];
363 sc = page_address(mappage);
364 sc += pos;
365 old = sc->val;
366 sc->val = mem;
367
368 return old;
369}
370
371/**
372 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
373 * @ent: swap entry to be looked up.
374 *
375 * Returns pointer to mem_cgroup at success. NULL at failure.
376 */
377struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
378{
379 int type = swp_type(ent);
380 unsigned long offset = swp_offset(ent);
381 unsigned long idx = offset / SC_PER_PAGE;
382 unsigned long pos = offset & SC_POS_MASK;
383 struct swap_cgroup_ctrl *ctrl;
384 struct page *mappage;
385 struct swap_cgroup *sc;
386 struct mem_cgroup *ret;
387
388 if (!do_swap_account)
389 return NULL;
390
391 ctrl = &swap_cgroup_ctrl[type];
392 mappage = ctrl->map[idx];
393 sc = page_address(mappage);
394 sc += pos;
395 ret = sc->val;
396 return ret;
397}
398
399int swap_cgroup_swapon(int type, unsigned long max_pages)
400{
401 void *array;
402 unsigned long array_size;
403 unsigned long length;
404 struct swap_cgroup_ctrl *ctrl;
405
406 if (!do_swap_account)
407 return 0;
408
409 length = ((max_pages/SC_PER_PAGE) + 1);
410 array_size = length * sizeof(void *);
411
412 array = vmalloc(array_size);
413 if (!array)
414 goto nomem;
415
416 memset(array, 0, array_size);
417 ctrl = &swap_cgroup_ctrl[type];
418 mutex_lock(&swap_cgroup_mutex);
419 ctrl->length = length;
420 ctrl->map = array;
421 if (swap_cgroup_prepare(type)) {
422 /* memory shortage */
423 ctrl->map = NULL;
424 ctrl->length = 0;
425 vfree(array);
426 mutex_unlock(&swap_cgroup_mutex);
427 goto nomem;
428 }
429 mutex_unlock(&swap_cgroup_mutex);
430
431 printk(KERN_INFO
432 "swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
433 " and %ld bytes to hold mem_cgroup pointers on swap\n",
434 array_size, length * PAGE_SIZE);
435 printk(KERN_INFO
436 "swap_cgroup can be disabled by noswapaccount boot option.\n");
437
438 return 0;
439nomem:
440 printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
441 printk(KERN_INFO
442 "swap_cgroup can be disabled by noswapaccount boot option\n");
443 return -ENOMEM;
444}
445
446void swap_cgroup_swapoff(int type)
447{
448 int i;
449 struct swap_cgroup_ctrl *ctrl;
450
451 if (!do_swap_account)
452 return;
453
454 mutex_lock(&swap_cgroup_mutex);
455 ctrl = &swap_cgroup_ctrl[type];
456 if (ctrl->map) {
457 for (i = 0; i < ctrl->length; i++) {
458 struct page *page = ctrl->map[i];
459 if (page)
460 __free_page(page);
461 }
462 vfree(ctrl->map);
463 ctrl->map = NULL;
464 ctrl->length = 0;
465 }
466 mutex_unlock(&swap_cgroup_mutex);
467}
468
469#endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ddc6d92be2cb..1e7a715a3866 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,6 +33,7 @@
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/tlbflush.h> 34#include <asm/tlbflush.h>
35#include <linux/swapops.h> 35#include <linux/swapops.h>
36#include <linux/page_cgroup.h>
36 37
37static DEFINE_SPINLOCK(swap_lock); 38static DEFINE_SPINLOCK(swap_lock);
38static unsigned int nr_swapfiles; 39static unsigned int nr_swapfiles;
@@ -1494,6 +1495,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1494 spin_unlock(&swap_lock); 1495 spin_unlock(&swap_lock);
1495 mutex_unlock(&swapon_mutex); 1496 mutex_unlock(&swapon_mutex);
1496 vfree(swap_map); 1497 vfree(swap_map);
1498 /* Destroy swap account informatin */
1499 swap_cgroup_swapoff(type);
1500
1497 inode = mapping->host; 1501 inode = mapping->host;
1498 if (S_ISBLK(inode->i_mode)) { 1502 if (S_ISBLK(inode->i_mode)) {
1499 struct block_device *bdev = I_BDEV(inode); 1503 struct block_device *bdev = I_BDEV(inode);
@@ -1811,6 +1815,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1811 } 1815 }
1812 swap_map[page_nr] = SWAP_MAP_BAD; 1816 swap_map[page_nr] = SWAP_MAP_BAD;
1813 } 1817 }
1818
1819 error = swap_cgroup_swapon(type, maxpages);
1820 if (error)
1821 goto bad_swap;
1822
1814 nr_good_pages = swap_header->info.last_page - 1823 nr_good_pages = swap_header->info.last_page -
1815 swap_header->info.nr_badpages - 1824 swap_header->info.nr_badpages -
1816 1 /* header page */; 1825 1 /* header page */;
@@ -1882,6 +1891,7 @@ bad_swap:
1882 bd_release(bdev); 1891 bd_release(bdev);
1883 } 1892 }
1884 destroy_swap_extents(p); 1893 destroy_swap_extents(p);
1894 swap_cgroup_swapoff(type);
1885bad_swap_2: 1895bad_swap_2:
1886 spin_lock(&swap_lock); 1896 spin_lock(&swap_lock);
1887 p->swap_file = NULL; 1897 p->swap_file = NULL;