diff options
Diffstat (limited to 'mm/page_cgroup.c')
-rw-r--r-- | mm/page_cgroup.c | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index df1e54a5ed19..685e7c8e1fd6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/vmalloc.h> | 9 | #include <linux/vmalloc.h> |
10 | #include <linux/cgroup.h> | 10 | #include <linux/cgroup.h> |
11 | #include <linux/swapops.h> | ||
11 | 12 | ||
12 | static void __meminit | 13 | static void __meminit |
13 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | 14 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) |
@@ -270,3 +271,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) | |||
270 | } | 271 | } |
271 | 272 | ||
272 | #endif | 273 | #endif |
274 | |||
275 | |||
276 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | ||
277 | |||
278 | static DEFINE_MUTEX(swap_cgroup_mutex); | ||
279 | struct swap_cgroup_ctrl { | ||
280 | struct page **map; | ||
281 | unsigned long length; | ||
282 | }; | ||
283 | |||
284 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; | ||
285 | |||
286 | /* | ||
287 | * This 8bytes seems big..maybe we can reduce this when we can use "id" for | ||
288 | * cgroup rather than pointer. | ||
289 | */ | ||
290 | struct swap_cgroup { | ||
291 | struct mem_cgroup *val; | ||
292 | }; | ||
293 | #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) | ||
294 | #define SC_POS_MASK (SC_PER_PAGE - 1) | ||
295 | |||
296 | /* | ||
297 | * SwapCgroup implements "lookup" and "exchange" operations. | ||
298 | * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge | ||
299 | * against SwapCache. At swap_free(), this is accessed directly from swap. | ||
300 | * | ||
301 | * This means, | ||
302 | * - we have no race in "exchange" when we're accessed via SwapCache because | ||
303 | * SwapCache(and its swp_entry) is under lock. | ||
304 | * - When called via swap_free(), there is no user of this entry and no race. | ||
305 | * Then, we don't need lock around "exchange". | ||
306 | * | ||
307 | * TODO: we can push these buffers out to HIGHMEM. | ||
308 | */ | ||
309 | |||
310 | /* | ||
311 | * allocate buffer for swap_cgroup. | ||
312 | */ | ||
313 | static int swap_cgroup_prepare(int type) | ||
314 | { | ||
315 | struct page *page; | ||
316 | struct swap_cgroup_ctrl *ctrl; | ||
317 | unsigned long idx, max; | ||
318 | |||
319 | if (!do_swap_account) | ||
320 | return 0; | ||
321 | ctrl = &swap_cgroup_ctrl[type]; | ||
322 | |||
323 | for (idx = 0; idx < ctrl->length; idx++) { | ||
324 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
325 | if (!page) | ||
326 | goto not_enough_page; | ||
327 | ctrl->map[idx] = page; | ||
328 | } | ||
329 | return 0; | ||
330 | not_enough_page: | ||
331 | max = idx; | ||
332 | for (idx = 0; idx < max; idx++) | ||
333 | __free_page(ctrl->map[idx]); | ||
334 | |||
335 | return -ENOMEM; | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * swap_cgroup_record - record mem_cgroup for this swp_entry. | ||
340 | * @ent: swap entry to be recorded into | ||
341 | * @mem: mem_cgroup to be recorded | ||
342 | * | ||
343 | * Returns old value at success, NULL at failure. | ||
344 | * (Of course, old value can be NULL.) | ||
345 | */ | ||
346 | struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) | ||
347 | { | ||
348 | int type = swp_type(ent); | ||
349 | unsigned long offset = swp_offset(ent); | ||
350 | unsigned long idx = offset / SC_PER_PAGE; | ||
351 | unsigned long pos = offset & SC_POS_MASK; | ||
352 | struct swap_cgroup_ctrl *ctrl; | ||
353 | struct page *mappage; | ||
354 | struct swap_cgroup *sc; | ||
355 | struct mem_cgroup *old; | ||
356 | |||
357 | if (!do_swap_account) | ||
358 | return NULL; | ||
359 | |||
360 | ctrl = &swap_cgroup_ctrl[type]; | ||
361 | |||
362 | mappage = ctrl->map[idx]; | ||
363 | sc = page_address(mappage); | ||
364 | sc += pos; | ||
365 | old = sc->val; | ||
366 | sc->val = mem; | ||
367 | |||
368 | return old; | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry | ||
373 | * @ent: swap entry to be looked up. | ||
374 | * | ||
375 | * Returns pointer to mem_cgroup at success. NULL at failure. | ||
376 | */ | ||
377 | struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) | ||
378 | { | ||
379 | int type = swp_type(ent); | ||
380 | unsigned long offset = swp_offset(ent); | ||
381 | unsigned long idx = offset / SC_PER_PAGE; | ||
382 | unsigned long pos = offset & SC_POS_MASK; | ||
383 | struct swap_cgroup_ctrl *ctrl; | ||
384 | struct page *mappage; | ||
385 | struct swap_cgroup *sc; | ||
386 | struct mem_cgroup *ret; | ||
387 | |||
388 | if (!do_swap_account) | ||
389 | return NULL; | ||
390 | |||
391 | ctrl = &swap_cgroup_ctrl[type]; | ||
392 | mappage = ctrl->map[idx]; | ||
393 | sc = page_address(mappage); | ||
394 | sc += pos; | ||
395 | ret = sc->val; | ||
396 | return ret; | ||
397 | } | ||
398 | |||
399 | int swap_cgroup_swapon(int type, unsigned long max_pages) | ||
400 | { | ||
401 | void *array; | ||
402 | unsigned long array_size; | ||
403 | unsigned long length; | ||
404 | struct swap_cgroup_ctrl *ctrl; | ||
405 | |||
406 | if (!do_swap_account) | ||
407 | return 0; | ||
408 | |||
409 | length = ((max_pages/SC_PER_PAGE) + 1); | ||
410 | array_size = length * sizeof(void *); | ||
411 | |||
412 | array = vmalloc(array_size); | ||
413 | if (!array) | ||
414 | goto nomem; | ||
415 | |||
416 | memset(array, 0, array_size); | ||
417 | ctrl = &swap_cgroup_ctrl[type]; | ||
418 | mutex_lock(&swap_cgroup_mutex); | ||
419 | ctrl->length = length; | ||
420 | ctrl->map = array; | ||
421 | if (swap_cgroup_prepare(type)) { | ||
422 | /* memory shortage */ | ||
423 | ctrl->map = NULL; | ||
424 | ctrl->length = 0; | ||
425 | vfree(array); | ||
426 | mutex_unlock(&swap_cgroup_mutex); | ||
427 | goto nomem; | ||
428 | } | ||
429 | mutex_unlock(&swap_cgroup_mutex); | ||
430 | |||
431 | printk(KERN_INFO | ||
432 | "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" | ||
433 | " and %ld bytes to hold mem_cgroup pointers on swap\n", | ||
434 | array_size, length * PAGE_SIZE); | ||
435 | printk(KERN_INFO | ||
436 | "swap_cgroup can be disabled by noswapaccount boot option.\n"); | ||
437 | |||
438 | return 0; | ||
439 | nomem: | ||
440 | printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); | ||
441 | printk(KERN_INFO | ||
442 | "swap_cgroup can be disabled by noswapaccount boot option\n"); | ||
443 | return -ENOMEM; | ||
444 | } | ||
445 | |||
446 | void swap_cgroup_swapoff(int type) | ||
447 | { | ||
448 | int i; | ||
449 | struct swap_cgroup_ctrl *ctrl; | ||
450 | |||
451 | if (!do_swap_account) | ||
452 | return; | ||
453 | |||
454 | mutex_lock(&swap_cgroup_mutex); | ||
455 | ctrl = &swap_cgroup_ctrl[type]; | ||
456 | if (ctrl->map) { | ||
457 | for (i = 0; i < ctrl->length; i++) { | ||
458 | struct page *page = ctrl->map[i]; | ||
459 | if (page) | ||
460 | __free_page(page); | ||
461 | } | ||
462 | vfree(ctrl->map); | ||
463 | ctrl->map = NULL; | ||
464 | ctrl->length = 0; | ||
465 | } | ||
466 | mutex_unlock(&swap_cgroup_mutex); | ||
467 | } | ||
468 | |||
469 | #endif | ||