diff options
Diffstat (limited to 'mm/page_cgroup.c')
-rw-r--r-- | mm/page_cgroup.c | 209 |
1 files changed, 202 insertions, 7 deletions
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index ab27ff750519..7006a11350c8 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/vmalloc.h> | 9 | #include <linux/vmalloc.h> |
10 | #include <linux/cgroup.h> | 10 | #include <linux/cgroup.h> |
11 | #include <linux/swapops.h> | ||
11 | 12 | ||
12 | static void __meminit | 13 | static void __meminit |
13 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | 14 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) |
@@ -15,6 +16,7 @@ __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | |||
15 | pc->flags = 0; | 16 | pc->flags = 0; |
16 | pc->mem_cgroup = NULL; | 17 | pc->mem_cgroup = NULL; |
17 | pc->page = pfn_to_page(pfn); | 18 | pc->page = pfn_to_page(pfn); |
19 | INIT_LIST_HEAD(&pc->lru); | ||
18 | } | 20 | } |
19 | static unsigned long total_usage; | 21 | static unsigned long total_usage; |
20 | 22 | ||
@@ -72,7 +74,7 @@ void __init page_cgroup_init(void) | |||
72 | 74 | ||
73 | int nid, fail; | 75 | int nid, fail; |
74 | 76 | ||
75 | if (mem_cgroup_subsys.disabled) | 77 | if (mem_cgroup_disabled()) |
76 | return; | 78 | return; |
77 | 79 | ||
78 | for_each_online_node(nid) { | 80 | for_each_online_node(nid) { |
@@ -101,15 +103,13 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
101 | } | 103 | } |
102 | 104 | ||
103 | /* __alloc_bootmem...() is protected by !slab_available() */ | 105 | /* __alloc_bootmem...() is protected by !slab_available() */ |
104 | int __init_refok init_section_page_cgroup(unsigned long pfn) | 106 | static int __init_refok init_section_page_cgroup(unsigned long pfn) |
105 | { | 107 | { |
106 | struct mem_section *section; | 108 | struct mem_section *section = __pfn_to_section(pfn); |
107 | struct page_cgroup *base, *pc; | 109 | struct page_cgroup *base, *pc; |
108 | unsigned long table_size; | 110 | unsigned long table_size; |
109 | int nid, index; | 111 | int nid, index; |
110 | 112 | ||
111 | section = __pfn_to_section(pfn); | ||
112 | |||
113 | if (!section->page_cgroup) { | 113 | if (!section->page_cgroup) { |
114 | nid = page_to_nid(pfn_to_page(pfn)); | 114 | nid = page_to_nid(pfn_to_page(pfn)); |
115 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 115 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
@@ -145,7 +145,6 @@ int __init_refok init_section_page_cgroup(unsigned long pfn) | |||
145 | __init_page_cgroup(pc, pfn + index); | 145 | __init_page_cgroup(pc, pfn + index); |
146 | } | 146 | } |
147 | 147 | ||
148 | section = __pfn_to_section(pfn); | ||
149 | section->page_cgroup = base - pfn; | 148 | section->page_cgroup = base - pfn; |
150 | total_usage += table_size; | 149 | total_usage += table_size; |
151 | return 0; | 150 | return 0; |
@@ -248,7 +247,7 @@ void __init page_cgroup_init(void) | |||
248 | unsigned long pfn; | 247 | unsigned long pfn; |
249 | int fail = 0; | 248 | int fail = 0; |
250 | 249 | ||
251 | if (mem_cgroup_subsys.disabled) | 250 | if (mem_cgroup_disabled()) |
252 | return; | 251 | return; |
253 | 252 | ||
254 | for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { | 253 | for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { |
@@ -273,3 +272,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) | |||
273 | } | 272 | } |
274 | 273 | ||
275 | #endif | 274 | #endif |
275 | |||
276 | |||
277 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | ||
278 | |||
279 | static DEFINE_MUTEX(swap_cgroup_mutex); | ||
280 | struct swap_cgroup_ctrl { | ||
281 | struct page **map; | ||
282 | unsigned long length; | ||
283 | }; | ||
284 | |||
285 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; | ||
286 | |||
287 | /* | ||
288 | * This 8bytes seems big..maybe we can reduce this when we can use "id" for | ||
289 | * cgroup rather than pointer. | ||
290 | */ | ||
291 | struct swap_cgroup { | ||
292 | struct mem_cgroup *val; | ||
293 | }; | ||
294 | #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) | ||
295 | #define SC_POS_MASK (SC_PER_PAGE - 1) | ||
296 | |||
297 | /* | ||
298 | * SwapCgroup implements "lookup" and "exchange" operations. | ||
299 | * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge | ||
300 | * against SwapCache. At swap_free(), this is accessed directly from swap. | ||
301 | * | ||
302 | * This means, | ||
303 | * - we have no race in "exchange" when we're accessed via SwapCache because | ||
304 | * SwapCache(and its swp_entry) is under lock. | ||
305 | * - When called via swap_free(), there is no user of this entry and no race. | ||
306 | * Then, we don't need lock around "exchange". | ||
307 | * | ||
308 | * TODO: we can push these buffers out to HIGHMEM. | ||
309 | */ | ||
310 | |||
311 | /* | ||
312 | * allocate buffer for swap_cgroup. | ||
313 | */ | ||
314 | static int swap_cgroup_prepare(int type) | ||
315 | { | ||
316 | struct page *page; | ||
317 | struct swap_cgroup_ctrl *ctrl; | ||
318 | unsigned long idx, max; | ||
319 | |||
320 | if (!do_swap_account) | ||
321 | return 0; | ||
322 | ctrl = &swap_cgroup_ctrl[type]; | ||
323 | |||
324 | for (idx = 0; idx < ctrl->length; idx++) { | ||
325 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
326 | if (!page) | ||
327 | goto not_enough_page; | ||
328 | ctrl->map[idx] = page; | ||
329 | } | ||
330 | return 0; | ||
331 | not_enough_page: | ||
332 | max = idx; | ||
333 | for (idx = 0; idx < max; idx++) | ||
334 | __free_page(ctrl->map[idx]); | ||
335 | |||
336 | return -ENOMEM; | ||
337 | } | ||
338 | |||
339 | /** | ||
340 | * swap_cgroup_record - record mem_cgroup for this swp_entry. | ||
341 | * @ent: swap entry to be recorded into | ||
342 | * @mem: mem_cgroup to be recorded | ||
343 | * | ||
344 | * Returns old value at success, NULL at failure. | ||
345 | * (Of course, old value can be NULL.) | ||
346 | */ | ||
347 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | ||
348 | { | ||
349 | int type = swp_type(ent); | ||
350 | unsigned long offset = swp_offset(ent); | ||
351 | unsigned long idx = offset / SC_PER_PAGE; | ||
352 | unsigned long pos = offset & SC_POS_MASK; | ||
353 | struct swap_cgroup_ctrl *ctrl; | ||
354 | struct page *mappage; | ||
355 | struct swap_cgroup *sc; | ||
356 | struct mem_cgroup *old; | ||
357 | |||
358 | if (!do_swap_account) | ||
359 | return NULL; | ||
360 | |||
361 | ctrl = &swap_cgroup_ctrl[type]; | ||
362 | |||
363 | mappage = ctrl->map[idx]; | ||
364 | sc = page_address(mappage); | ||
365 | sc += pos; | ||
366 | old = sc->val; | ||
367 | sc->val = mem; | ||
368 | |||
369 | return old; | ||
370 | } | ||
371 | |||
372 | /** | ||
373 | * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry | ||
374 | * @ent: swap entry to be looked up. | ||
375 | * | ||
376 | * Returns pointer to mem_cgroup at success. NULL at failure. | ||
377 | */ | ||
378 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | ||
379 | { | ||
380 | int type = swp_type(ent); | ||
381 | unsigned long offset = swp_offset(ent); | ||
382 | unsigned long idx = offset / SC_PER_PAGE; | ||
383 | unsigned long pos = offset & SC_POS_MASK; | ||
384 | struct swap_cgroup_ctrl *ctrl; | ||
385 | struct page *mappage; | ||
386 | struct swap_cgroup *sc; | ||
387 | struct mem_cgroup *ret; | ||
388 | |||
389 | if (!do_swap_account) | ||
390 | return NULL; | ||
391 | |||
392 | ctrl = &swap_cgroup_ctrl[type]; | ||
393 | mappage = ctrl->map[idx]; | ||
394 | sc = page_address(mappage); | ||
395 | sc += pos; | ||
396 | ret = sc->val; | ||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | int swap_cgroup_swapon(int type, unsigned long max_pages) | ||
401 | { | ||
402 | void *array; | ||
403 | unsigned long array_size; | ||
404 | unsigned long length; | ||
405 | struct swap_cgroup_ctrl *ctrl; | ||
406 | |||
407 | if (!do_swap_account) | ||
408 | return 0; | ||
409 | |||
410 | length = ((max_pages/SC_PER_PAGE) + 1); | ||
411 | array_size = length * sizeof(void *); | ||
412 | |||
413 | array = vmalloc(array_size); | ||
414 | if (!array) | ||
415 | goto nomem; | ||
416 | |||
417 | memset(array, 0, array_size); | ||
418 | ctrl = &swap_cgroup_ctrl[type]; | ||
419 | mutex_lock(&swap_cgroup_mutex); | ||
420 | ctrl->length = length; | ||
421 | ctrl->map = array; | ||
422 | if (swap_cgroup_prepare(type)) { | ||
423 | /* memory shortage */ | ||
424 | ctrl->map = NULL; | ||
425 | ctrl->length = 0; | ||
426 | vfree(array); | ||
427 | mutex_unlock(&swap_cgroup_mutex); | ||
428 | goto nomem; | ||
429 | } | ||
430 | mutex_unlock(&swap_cgroup_mutex); | ||
431 | |||
432 | printk(KERN_INFO | ||
433 | "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" | ||
434 | " and %ld bytes to hold mem_cgroup pointers on swap\n", | ||
435 | array_size, length * PAGE_SIZE); | ||
436 | printk(KERN_INFO | ||
437 | "swap_cgroup can be disabled by noswapaccount boot option.\n"); | ||
438 | |||
439 | return 0; | ||
440 | nomem: | ||
441 | printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); | ||
442 | printk(KERN_INFO | ||
443 | "swap_cgroup can be disabled by noswapaccount boot option\n"); | ||
444 | return -ENOMEM; | ||
445 | } | ||
446 | |||
447 | void swap_cgroup_swapoff(int type) | ||
448 | { | ||
449 | int i; | ||
450 | struct swap_cgroup_ctrl *ctrl; | ||
451 | |||
452 | if (!do_swap_account) | ||
453 | return; | ||
454 | |||
455 | mutex_lock(&swap_cgroup_mutex); | ||
456 | ctrl = &swap_cgroup_ctrl[type]; | ||
457 | if (ctrl->map) { | ||
458 | for (i = 0; i < ctrl->length; i++) { | ||
459 | struct page *page = ctrl->map[i]; | ||
460 | if (page) | ||
461 | __free_page(page); | ||
462 | } | ||
463 | vfree(ctrl->map); | ||
464 | ctrl->map = NULL; | ||
465 | ctrl->length = 0; | ||
466 | } | ||
467 | mutex_unlock(&swap_cgroup_mutex); | ||
468 | } | ||
469 | |||
470 | #endif | ||