diff options
-rw-r--r-- | include/linux/memcontrol.h | 110 | ||||
-rw-r--r-- | mm/memcontrol.c | 170 |
2 files changed, 280 insertions, 0 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e98a74c0c9c0..afa2ad40457e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define _LINUX_MEMCONTROL_H | 21 | #define _LINUX_MEMCONTROL_H |
22 | #include <linux/cgroup.h> | 22 | #include <linux/cgroup.h> |
23 | #include <linux/vm_event_item.h> | 23 | #include <linux/vm_event_item.h> |
24 | #include <linux/hardirq.h> | ||
24 | 25 | ||
25 | struct mem_cgroup; | 26 | struct mem_cgroup; |
26 | struct page_cgroup; | 27 | struct page_cgroup; |
@@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk) | |||
414 | { | 415 | { |
415 | } | 416 | } |
416 | #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ | 417 | #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ |
418 | |||
419 | #ifdef CONFIG_MEMCG_KMEM | ||
420 | static inline bool memcg_kmem_enabled(void) | ||
421 | { | ||
422 | return true; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * In general, we'll do everything in our power to not incur in any overhead | ||
427 | * for non-memcg users for the kmem functions. Not even a function call, if we | ||
428 | * can avoid it. | ||
429 | * | ||
430 | * Therefore, we'll inline all those functions so that in the best case, we'll | ||
431 | * see that kmemcg is off for everybody and proceed quickly. If it is on, | ||
432 | * we'll still do most of the flag checking inline. We check a lot of | ||
433 | * conditions, but because they are pretty simple, they are expected to be | ||
434 | * fast. | ||
435 | */ | ||
436 | bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, | ||
437 | int order); | ||
438 | void __memcg_kmem_commit_charge(struct page *page, | ||
439 | struct mem_cgroup *memcg, int order); | ||
440 | void __memcg_kmem_uncharge_pages(struct page *page, int order); | ||
441 | |||
442 | /** | ||
443 | * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. | ||
444 | * @gfp: the gfp allocation flags. | ||
445 | * @memcg: a pointer to the memcg this was charged against. | ||
446 | * @order: allocation order. | ||
447 | * | ||
448 | * returns true if the memcg where the current task belongs can hold this | ||
449 | * allocation. | ||
450 | * | ||
451 | * We return true automatically if this allocation is not to be accounted to | ||
452 | * any memcg. | ||
453 | */ | ||
454 | static inline bool | ||
455 | memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) | ||
456 | { | ||
457 | if (!memcg_kmem_enabled()) | ||
458 | return true; | ||
459 | |||
460 | /* | ||
461 | * __GFP_NOFAIL allocations will move on even if charging is not | ||
462 | * possible. Therefore we don't even try, and have this allocation | ||
463 | * unaccounted. We could in theory charge it with | ||
464 | * res_counter_charge_nofail, but we hope those allocations are rare, | ||
465 | * and won't be worth the trouble. | ||
466 | */ | ||
467 | if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) | ||
468 | return true; | ||
469 | if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) | ||
470 | return true; | ||
471 | |||
472 | /* If the test is dying, just let it go. */ | ||
473 | if (unlikely(fatal_signal_pending(current))) | ||
474 | return true; | ||
475 | |||
476 | return __memcg_kmem_newpage_charge(gfp, memcg, order); | ||
477 | } | ||
478 | |||
479 | /** | ||
480 | * memcg_kmem_uncharge_pages: uncharge pages from memcg | ||
481 | * @page: pointer to struct page being freed | ||
482 | * @order: allocation order. | ||
483 | * | ||
484 | * there is no need to specify memcg here, since it is embedded in page_cgroup | ||
485 | */ | ||
486 | static inline void | ||
487 | memcg_kmem_uncharge_pages(struct page *page, int order) | ||
488 | { | ||
489 | if (memcg_kmem_enabled()) | ||
490 | __memcg_kmem_uncharge_pages(page, order); | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * memcg_kmem_commit_charge: embeds correct memcg in a page | ||
495 | * @page: pointer to struct page recently allocated | ||
496 | * @memcg: the memcg structure we charged against | ||
497 | * @order: allocation order. | ||
498 | * | ||
499 | * Needs to be called after memcg_kmem_newpage_charge, regardless of success or | ||
500 | * failure of the allocation. if @page is NULL, this function will revert the | ||
501 | * charges. Otherwise, it will commit the memcg given by @memcg to the | ||
502 | * corresponding page_cgroup. | ||
503 | */ | ||
504 | static inline void | ||
505 | memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) | ||
506 | { | ||
507 | if (memcg_kmem_enabled() && memcg) | ||
508 | __memcg_kmem_commit_charge(page, memcg, order); | ||
509 | } | ||
510 | |||
511 | #else | ||
512 | static inline bool | ||
513 | memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) | ||
514 | { | ||
515 | return true; | ||
516 | } | ||
517 | |||
518 | static inline void memcg_kmem_uncharge_pages(struct page *page, int order) | ||
519 | { | ||
520 | } | ||
521 | |||
522 | static inline void | ||
523 | memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) | ||
524 | { | ||
525 | } | ||
526 | #endif /* CONFIG_MEMCG_KMEM */ | ||
417 | #endif /* _LINUX_MEMCONTROL_H */ | 527 | #endif /* _LINUX_MEMCONTROL_H */ |
418 | 528 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bba1cb4bbb82..b9afa060b8d6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -10,6 +10,10 @@ | |||
10 | * Copyright (C) 2009 Nokia Corporation | 10 | * Copyright (C) 2009 Nokia Corporation |
11 | * Author: Kirill A. Shutemov | 11 | * Author: Kirill A. Shutemov |
12 | * | 12 | * |
13 | * Kernel Memory Controller | ||
14 | * Copyright (C) 2012 Parallels Inc. and Google Inc. | ||
15 | * Authors: Glauber Costa and Suleiman Souhlal | ||
16 | * | ||
13 | * This program is free software; you can redistribute it and/or modify | 17 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by | 18 | * it under the terms of the GNU General Public License as published by |
15 | * the Free Software Foundation; either version 2 of the License, or | 19 | * the Free Software Foundation; either version 2 of the License, or |
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2661 | memcg_check_events(memcg, page); | 2665 | memcg_check_events(memcg, page); |
2662 | } | 2666 | } |
2663 | 2667 | ||
2668 | #ifdef CONFIG_MEMCG_KMEM | ||
2669 | static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) | ||
2670 | { | ||
2671 | return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && | ||
2672 | (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK); | ||
2673 | } | ||
2674 | |||
2675 | static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) | ||
2676 | { | ||
2677 | struct res_counter *fail_res; | ||
2678 | struct mem_cgroup *_memcg; | ||
2679 | int ret = 0; | ||
2680 | bool may_oom; | ||
2681 | |||
2682 | ret = res_counter_charge(&memcg->kmem, size, &fail_res); | ||
2683 | if (ret) | ||
2684 | return ret; | ||
2685 | |||
2686 | /* | ||
2687 | * Conditions under which we can wait for the oom_killer. Those are | ||
2688 | * the same conditions tested by the core page allocator | ||
2689 | */ | ||
2690 | may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY); | ||
2691 | |||
2692 | _memcg = memcg; | ||
2693 | ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, | ||
2694 | &_memcg, may_oom); | ||
2695 | |||
2696 | if (ret == -EINTR) { | ||
2697 | /* | ||
2698 | * __mem_cgroup_try_charge() chosed to bypass to root due to | ||
2699 | * OOM kill or fatal signal. Since our only options are to | ||
2700 | * either fail the allocation or charge it to this cgroup, do | ||
2701 | * it as a temporary condition. But we can't fail. From a | ||
2702 | * kmem/slab perspective, the cache has already been selected, | ||
2703 | * by mem_cgroup_kmem_get_cache(), so it is too late to change | ||
2704 | * our minds. | ||
2705 | * | ||
2706 | * This condition will only trigger if the task entered | ||
2707 | * memcg_charge_kmem in a sane state, but was OOM-killed during | ||
2708 | * __mem_cgroup_try_charge() above. Tasks that were already | ||
2709 | * dying when the allocation triggers should have been already | ||
2710 | * directed to the root cgroup in memcontrol.h | ||
2711 | */ | ||
2712 | res_counter_charge_nofail(&memcg->res, size, &fail_res); | ||
2713 | if (do_swap_account) | ||
2714 | res_counter_charge_nofail(&memcg->memsw, size, | ||
2715 | &fail_res); | ||
2716 | ret = 0; | ||
2717 | } else if (ret) | ||
2718 | res_counter_uncharge(&memcg->kmem, size); | ||
2719 | |||
2720 | return ret; | ||
2721 | } | ||
2722 | |||
2723 | static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) | ||
2724 | { | ||
2725 | res_counter_uncharge(&memcg->kmem, size); | ||
2726 | res_counter_uncharge(&memcg->res, size); | ||
2727 | if (do_swap_account) | ||
2728 | res_counter_uncharge(&memcg->memsw, size); | ||
2729 | } | ||
2730 | |||
2731 | /* | ||
2732 | * We need to verify if the allocation against current->mm->owner's memcg is | ||
2733 | * possible for the given order. But the page is not allocated yet, so we'll | ||
2734 | * need a further commit step to do the final arrangements. | ||
2735 | * | ||
2736 | * It is possible for the task to switch cgroups in this mean time, so at | ||
2737 | * commit time, we can't rely on task conversion any longer. We'll then use | ||
2738 | * the handle argument to return to the caller which cgroup we should commit | ||
2739 | * against. We could also return the memcg directly and avoid the pointer | ||
2740 | * passing, but a boolean return value gives better semantics considering | ||
2741 | * the compiled-out case as well. | ||
2742 | * | ||
2743 | * Returning true means the allocation is possible. | ||
2744 | */ | ||
2745 | bool | ||
2746 | __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) | ||
2747 | { | ||
2748 | struct mem_cgroup *memcg; | ||
2749 | int ret; | ||
2750 | |||
2751 | *_memcg = NULL; | ||
2752 | memcg = try_get_mem_cgroup_from_mm(current->mm); | ||
2753 | |||
2754 | /* | ||
2755 | * very rare case described in mem_cgroup_from_task. Unfortunately there | ||
2756 | * isn't much we can do without complicating this too much, and it would | ||
2757 | * be gfp-dependent anyway. Just let it go | ||
2758 | */ | ||
2759 | if (unlikely(!memcg)) | ||
2760 | return true; | ||
2761 | |||
2762 | if (!memcg_can_account_kmem(memcg)) { | ||
2763 | css_put(&memcg->css); | ||
2764 | return true; | ||
2765 | } | ||
2766 | |||
2767 | mem_cgroup_get(memcg); | ||
2768 | |||
2769 | ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order); | ||
2770 | if (!ret) | ||
2771 | *_memcg = memcg; | ||
2772 | else | ||
2773 | mem_cgroup_put(memcg); | ||
2774 | |||
2775 | css_put(&memcg->css); | ||
2776 | return (ret == 0); | ||
2777 | } | ||
2778 | |||
2779 | void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, | ||
2780 | int order) | ||
2781 | { | ||
2782 | struct page_cgroup *pc; | ||
2783 | |||
2784 | VM_BUG_ON(mem_cgroup_is_root(memcg)); | ||
2785 | |||
2786 | /* The page allocation failed. Revert */ | ||
2787 | if (!page) { | ||
2788 | memcg_uncharge_kmem(memcg, PAGE_SIZE << order); | ||
2789 | mem_cgroup_put(memcg); | ||
2790 | return; | ||
2791 | } | ||
2792 | |||
2793 | pc = lookup_page_cgroup(page); | ||
2794 | lock_page_cgroup(pc); | ||
2795 | pc->mem_cgroup = memcg; | ||
2796 | SetPageCgroupUsed(pc); | ||
2797 | unlock_page_cgroup(pc); | ||
2798 | } | ||
2799 | |||
2800 | void __memcg_kmem_uncharge_pages(struct page *page, int order) | ||
2801 | { | ||
2802 | struct mem_cgroup *memcg = NULL; | ||
2803 | struct page_cgroup *pc; | ||
2804 | |||
2805 | |||
2806 | pc = lookup_page_cgroup(page); | ||
2807 | /* | ||
2808 | * Fast unlocked return. Theoretically might have changed, have to | ||
2809 | * check again after locking. | ||
2810 | */ | ||
2811 | if (!PageCgroupUsed(pc)) | ||
2812 | return; | ||
2813 | |||
2814 | lock_page_cgroup(pc); | ||
2815 | if (PageCgroupUsed(pc)) { | ||
2816 | memcg = pc->mem_cgroup; | ||
2817 | ClearPageCgroupUsed(pc); | ||
2818 | } | ||
2819 | unlock_page_cgroup(pc); | ||
2820 | |||
2821 | /* | ||
2822 | * We trust that only if there is a memcg associated with the page, it | ||
2823 | * is a valid allocation | ||
2824 | */ | ||
2825 | if (!memcg) | ||
2826 | return; | ||
2827 | |||
2828 | VM_BUG_ON(mem_cgroup_is_root(memcg)); | ||
2829 | memcg_uncharge_kmem(memcg, PAGE_SIZE << order); | ||
2830 | mem_cgroup_put(memcg); | ||
2831 | } | ||
2832 | #endif /* CONFIG_MEMCG_KMEM */ | ||
2833 | |||
2664 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2834 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2665 | 2835 | ||
2666 | #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) | 2836 | #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) |