diff options
-rw-r--r-- | include/linux/page_cgroup.h | 35 | ||||
-rw-r--r-- | mm/page_cgroup.c | 197 | ||||
-rw-r--r-- | mm/swapfile.c | 10 |
3 files changed, 242 insertions, 0 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1e6d34bfa094..d754b2dfbf2d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -105,4 +105,39 @@ static inline void page_cgroup_init(void) | |||
105 | } | 105 | } |
106 | 106 | ||
107 | #endif | 107 | #endif |
108 | |||
109 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | ||
110 | #include <linux/swap.h> | ||
111 | extern struct mem_cgroup * | ||
112 | swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); | ||
113 | extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent); | ||
114 | extern int swap_cgroup_swapon(int type, unsigned long max_pages); | ||
115 | extern void swap_cgroup_swapoff(int type); | ||
116 | #else | ||
117 | #include <linux/swap.h> | ||
118 | |||
119 | static inline | ||
120 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | ||
121 | { | ||
122 | return NULL; | ||
123 | } | ||
124 | |||
125 | static inline | ||
126 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | ||
127 | { | ||
128 | return NULL; | ||
129 | } | ||
130 | |||
131 | static inline int | ||
132 | swap_cgroup_swapon(int type, unsigned long max_pages) | ||
133 | { | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static inline void swap_cgroup_swapoff(int type) | ||
138 | { | ||
139 | return; | ||
140 | } | ||
141 | |||
142 | #endif | ||
108 | #endif | 143 | #endif |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index df1e54a5ed19..685e7c8e1fd6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/vmalloc.h> | 9 | #include <linux/vmalloc.h> |
10 | #include <linux/cgroup.h> | 10 | #include <linux/cgroup.h> |
11 | #include <linux/swapops.h> | ||
11 | 12 | ||
12 | static void __meminit | 13 | static void __meminit |
13 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | 14 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) |
@@ -270,3 +271,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) | |||
270 | } | 271 | } |
271 | 272 | ||
272 | #endif | 273 | #endif |
274 | |||
275 | |||
276 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | ||
277 | |||
278 | static DEFINE_MUTEX(swap_cgroup_mutex); | ||
279 | struct swap_cgroup_ctrl { | ||
280 | struct page **map; | ||
281 | unsigned long length; | ||
282 | }; | ||
283 | |||
284 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; | ||
285 | |||
286 | /* | ||
287 | * This 8bytes seems big..maybe we can reduce this when we can use "id" for | ||
288 | * cgroup rather than pointer. | ||
289 | */ | ||
290 | struct swap_cgroup { | ||
291 | struct mem_cgroup *val; | ||
292 | }; | ||
293 | #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) | ||
294 | #define SC_POS_MASK (SC_PER_PAGE - 1) | ||
295 | |||
296 | /* | ||
297 | * SwapCgroup implements "lookup" and "exchange" operations. | ||
298 | * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge | ||
299 | * against SwapCache. At swap_free(), this is accessed directly from swap. | ||
300 | * | ||
301 | * This means, | ||
302 | * - we have no race in "exchange" when we're accessed via SwapCache because | ||
303 | * SwapCache(and its swp_entry) is under lock. | ||
304 | * - When called via swap_free(), there is no user of this entry and no race. | ||
305 | * Then, we don't need lock around "exchange". | ||
306 | * | ||
307 | * TODO: we can push these buffers out to HIGHMEM. | ||
308 | */ | ||
309 | |||
310 | /* | ||
311 | * allocate buffer for swap_cgroup. | ||
312 | */ | ||
313 | static int swap_cgroup_prepare(int type) | ||
314 | { | ||
315 | struct page *page; | ||
316 | struct swap_cgroup_ctrl *ctrl; | ||
317 | unsigned long idx, max; | ||
318 | |||
319 | if (!do_swap_account) | ||
320 | return 0; | ||
321 | ctrl = &swap_cgroup_ctrl[type]; | ||
322 | |||
323 | for (idx = 0; idx < ctrl->length; idx++) { | ||
324 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
325 | if (!page) | ||
326 | goto not_enough_page; | ||
327 | ctrl->map[idx] = page; | ||
328 | } | ||
329 | return 0; | ||
330 | not_enough_page: | ||
331 | max = idx; | ||
332 | for (idx = 0; idx < max; idx++) | ||
333 | __free_page(ctrl->map[idx]); | ||
334 | |||
335 | return -ENOMEM; | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * swap_cgroup_record - record mem_cgroup for this swp_entry. | ||
340 | * @ent: swap entry to be recorded into | ||
341 | * @mem: mem_cgroup to be recorded | ||
342 | * | ||
343 | * Returns old value at success, NULL at failure. | ||
344 | * (Of course, old value can be NULL.) | ||
345 | */ | ||
346 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | ||
347 | { | ||
348 | int type = swp_type(ent); | ||
349 | unsigned long offset = swp_offset(ent); | ||
350 | unsigned long idx = offset / SC_PER_PAGE; | ||
351 | unsigned long pos = offset & SC_POS_MASK; | ||
352 | struct swap_cgroup_ctrl *ctrl; | ||
353 | struct page *mappage; | ||
354 | struct swap_cgroup *sc; | ||
355 | struct mem_cgroup *old; | ||
356 | |||
357 | if (!do_swap_account) | ||
358 | return NULL; | ||
359 | |||
360 | ctrl = &swap_cgroup_ctrl[type]; | ||
361 | |||
362 | mappage = ctrl->map[idx]; | ||
363 | sc = page_address(mappage); | ||
364 | sc += pos; | ||
365 | old = sc->val; | ||
366 | sc->val = mem; | ||
367 | |||
368 | return old; | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry | ||
373 | * @ent: swap entry to be looked up. | ||
374 | * | ||
375 | * Returns pointer to mem_cgroup at success. NULL at failure. | ||
376 | */ | ||
377 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | ||
378 | { | ||
379 | int type = swp_type(ent); | ||
380 | unsigned long offset = swp_offset(ent); | ||
381 | unsigned long idx = offset / SC_PER_PAGE; | ||
382 | unsigned long pos = offset & SC_POS_MASK; | ||
383 | struct swap_cgroup_ctrl *ctrl; | ||
384 | struct page *mappage; | ||
385 | struct swap_cgroup *sc; | ||
386 | struct mem_cgroup *ret; | ||
387 | |||
388 | if (!do_swap_account) | ||
389 | return NULL; | ||
390 | |||
391 | ctrl = &swap_cgroup_ctrl[type]; | ||
392 | mappage = ctrl->map[idx]; | ||
393 | sc = page_address(mappage); | ||
394 | sc += pos; | ||
395 | ret = sc->val; | ||
396 | return ret; | ||
397 | } | ||
398 | |||
399 | int swap_cgroup_swapon(int type, unsigned long max_pages) | ||
400 | { | ||
401 | void *array; | ||
402 | unsigned long array_size; | ||
403 | unsigned long length; | ||
404 | struct swap_cgroup_ctrl *ctrl; | ||
405 | |||
406 | if (!do_swap_account) | ||
407 | return 0; | ||
408 | |||
409 | length = ((max_pages/SC_PER_PAGE) + 1); | ||
410 | array_size = length * sizeof(void *); | ||
411 | |||
412 | array = vmalloc(array_size); | ||
413 | if (!array) | ||
414 | goto nomem; | ||
415 | |||
416 | memset(array, 0, array_size); | ||
417 | ctrl = &swap_cgroup_ctrl[type]; | ||
418 | mutex_lock(&swap_cgroup_mutex); | ||
419 | ctrl->length = length; | ||
420 | ctrl->map = array; | ||
421 | if (swap_cgroup_prepare(type)) { | ||
422 | /* memory shortage */ | ||
423 | ctrl->map = NULL; | ||
424 | ctrl->length = 0; | ||
425 | vfree(array); | ||
426 | mutex_unlock(&swap_cgroup_mutex); | ||
427 | goto nomem; | ||
428 | } | ||
429 | mutex_unlock(&swap_cgroup_mutex); | ||
430 | |||
431 | printk(KERN_INFO | ||
432 | "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" | ||
433 | " and %ld bytes to hold mem_cgroup pointers on swap\n", | ||
434 | array_size, length * PAGE_SIZE); | ||
435 | printk(KERN_INFO | ||
436 | "swap_cgroup can be disabled by noswapaccount boot option.\n"); | ||
437 | |||
438 | return 0; | ||
439 | nomem: | ||
440 | printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); | ||
441 | printk(KERN_INFO | ||
442 | "swap_cgroup can be disabled by noswapaccount boot option\n"); | ||
443 | return -ENOMEM; | ||
444 | } | ||
445 | |||
446 | void swap_cgroup_swapoff(int type) | ||
447 | { | ||
448 | int i; | ||
449 | struct swap_cgroup_ctrl *ctrl; | ||
450 | |||
451 | if (!do_swap_account) | ||
452 | return; | ||
453 | |||
454 | mutex_lock(&swap_cgroup_mutex); | ||
455 | ctrl = &swap_cgroup_ctrl[type]; | ||
456 | if (ctrl->map) { | ||
457 | for (i = 0; i < ctrl->length; i++) { | ||
458 | struct page *page = ctrl->map[i]; | ||
459 | if (page) | ||
460 | __free_page(page); | ||
461 | } | ||
462 | vfree(ctrl->map); | ||
463 | ctrl->map = NULL; | ||
464 | ctrl->length = 0; | ||
465 | } | ||
466 | mutex_unlock(&swap_cgroup_mutex); | ||
467 | } | ||
468 | |||
469 | #endif | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index ddc6d92be2cb..1e7a715a3866 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/tlbflush.h> | 34 | #include <asm/tlbflush.h> |
35 | #include <linux/swapops.h> | 35 | #include <linux/swapops.h> |
36 | #include <linux/page_cgroup.h> | ||
36 | 37 | ||
37 | static DEFINE_SPINLOCK(swap_lock); | 38 | static DEFINE_SPINLOCK(swap_lock); |
38 | static unsigned int nr_swapfiles; | 39 | static unsigned int nr_swapfiles; |
@@ -1494,6 +1495,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1494 | spin_unlock(&swap_lock); | 1495 | spin_unlock(&swap_lock); |
1495 | mutex_unlock(&swapon_mutex); | 1496 | mutex_unlock(&swapon_mutex); |
1496 | vfree(swap_map); | 1497 | vfree(swap_map); |
1498 | /* Destroy swap account informatin */ | ||
1499 | swap_cgroup_swapoff(type); | ||
1500 | |||
1497 | inode = mapping->host; | 1501 | inode = mapping->host; |
1498 | if (S_ISBLK(inode->i_mode)) { | 1502 | if (S_ISBLK(inode->i_mode)) { |
1499 | struct block_device *bdev = I_BDEV(inode); | 1503 | struct block_device *bdev = I_BDEV(inode); |
@@ -1811,6 +1815,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1811 | } | 1815 | } |
1812 | swap_map[page_nr] = SWAP_MAP_BAD; | 1816 | swap_map[page_nr] = SWAP_MAP_BAD; |
1813 | } | 1817 | } |
1818 | |||
1819 | error = swap_cgroup_swapon(type, maxpages); | ||
1820 | if (error) | ||
1821 | goto bad_swap; | ||
1822 | |||
1814 | nr_good_pages = swap_header->info.last_page - | 1823 | nr_good_pages = swap_header->info.last_page - |
1815 | swap_header->info.nr_badpages - | 1824 | swap_header->info.nr_badpages - |
1816 | 1 /* header page */; | 1825 | 1 /* header page */; |
@@ -1882,6 +1891,7 @@ bad_swap: | |||
1882 | bd_release(bdev); | 1891 | bd_release(bdev); |
1883 | } | 1892 | } |
1884 | destroy_swap_extents(p); | 1893 | destroy_swap_extents(p); |
1894 | swap_cgroup_swapoff(type); | ||
1885 | bad_swap_2: | 1895 | bad_swap_2: |
1886 | spin_lock(&swap_lock); | 1896 | spin_lock(&swap_lock); |
1887 | p->swap_file = NULL; | 1897 | p->swap_file = NULL; |