aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h110
-rw-r--r--mm/memcontrol.c170
2 files changed, 280 insertions, 0 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c0c9c0..afa2ad40457e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
21#define _LINUX_MEMCONTROL_H 21#define _LINUX_MEMCONTROL_H
22#include <linux/cgroup.h> 22#include <linux/cgroup.h>
23#include <linux/vm_event_item.h> 23#include <linux/vm_event_item.h>
24#include <linux/hardirq.h>
24 25
25struct mem_cgroup; 26struct mem_cgroup;
26struct page_cgroup; 27struct page_cgroup;
@@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk)
414{ 415{
415} 416}
416#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ 417#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
418
419#ifdef CONFIG_MEMCG_KMEM
420static inline bool memcg_kmem_enabled(void)
421{
422 return true;
423}
424
425/*
426 * In general, we'll do everything in our power to not incur in any overhead
427 * for non-memcg users for the kmem functions. Not even a function call, if we
428 * can avoid it.
429 *
430 * Therefore, we'll inline all those functions so that in the best case, we'll
431 * see that kmemcg is off for everybody and proceed quickly. If it is on,
432 * we'll still do most of the flag checking inline. We check a lot of
433 * conditions, but because they are pretty simple, they are expected to be
434 * fast.
435 */
436bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
437 int order);
438void __memcg_kmem_commit_charge(struct page *page,
439 struct mem_cgroup *memcg, int order);
440void __memcg_kmem_uncharge_pages(struct page *page, int order);
441
442/**
443 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
444 * @gfp: the gfp allocation flags.
445 * @memcg: a pointer to the memcg this was charged against.
446 * @order: allocation order.
447 *
448 * returns true if the memcg where the current task belongs can hold this
449 * allocation.
450 *
451 * We return true automatically if this allocation is not to be accounted to
452 * any memcg.
453 */
454static inline bool
455memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
456{
457 if (!memcg_kmem_enabled())
458 return true;
459
460 /*
461 * __GFP_NOFAIL allocations will move on even if charging is not
462 * possible. Therefore we don't even try, and have this allocation
463 * unaccounted. We could in theory charge it with
464 * res_counter_charge_nofail, but we hope those allocations are rare,
465 * and won't be worth the trouble.
466 */
467 if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
468 return true;
469 if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
470 return true;
471
472 /* If the test is dying, just let it go. */
473 if (unlikely(fatal_signal_pending(current)))
474 return true;
475
476 return __memcg_kmem_newpage_charge(gfp, memcg, order);
477}
478
479/**
480 * memcg_kmem_uncharge_pages: uncharge pages from memcg
481 * @page: pointer to struct page being freed
482 * @order: allocation order.
483 *
484 * there is no need to specify memcg here, since it is embedded in page_cgroup
485 */
486static inline void
487memcg_kmem_uncharge_pages(struct page *page, int order)
488{
489 if (memcg_kmem_enabled())
490 __memcg_kmem_uncharge_pages(page, order);
491}
492
493/**
494 * memcg_kmem_commit_charge: embeds correct memcg in a page
495 * @page: pointer to struct page recently allocated
496 * @memcg: the memcg structure we charged against
497 * @order: allocation order.
498 *
499 * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
500 * failure of the allocation. if @page is NULL, this function will revert the
501 * charges. Otherwise, it will commit the memcg given by @memcg to the
502 * corresponding page_cgroup.
503 */
504static inline void
505memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
506{
507 if (memcg_kmem_enabled() && memcg)
508 __memcg_kmem_commit_charge(page, memcg, order);
509}
510
511#else
512static inline bool
513memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
514{
515 return true;
516}
517
518static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
519{
520}
521
522static inline void
523memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
524{
525}
526#endif /* CONFIG_MEMCG_KMEM */
417#endif /* _LINUX_MEMCONTROL_H */ 527#endif /* _LINUX_MEMCONTROL_H */
418 528
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bba1cb4bbb82..b9afa060b8d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -10,6 +10,10 @@
10 * Copyright (C) 2009 Nokia Corporation 10 * Copyright (C) 2009 Nokia Corporation
11 * Author: Kirill A. Shutemov 11 * Author: Kirill A. Shutemov
12 * 12 *
13 * Kernel Memory Controller
14 * Copyright (C) 2012 Parallels Inc. and Google Inc.
15 * Authors: Glauber Costa and Suleiman Souhlal
16 *
13 * This program is free software; you can redistribute it and/or modify 17 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by 18 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or 19 * the Free Software Foundation; either version 2 of the License, or
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2661 memcg_check_events(memcg, page); 2665 memcg_check_events(memcg, page);
2662} 2666}
2663 2667
2668#ifdef CONFIG_MEMCG_KMEM
2669static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2670{
2671 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
2672 (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
2673}
2674
2675static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2676{
2677 struct res_counter *fail_res;
2678 struct mem_cgroup *_memcg;
2679 int ret = 0;
2680 bool may_oom;
2681
2682 ret = res_counter_charge(&memcg->kmem, size, &fail_res);
2683 if (ret)
2684 return ret;
2685
2686 /*
2687 * Conditions under which we can wait for the oom_killer. Those are
2688 * the same conditions tested by the core page allocator
2689 */
2690 may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
2691
2692 _memcg = memcg;
2693 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
2694 &_memcg, may_oom);
2695
2696 if (ret == -EINTR) {
2697 /*
2698 * __mem_cgroup_try_charge() chosed to bypass to root due to
2699 * OOM kill or fatal signal. Since our only options are to
2700 * either fail the allocation or charge it to this cgroup, do
2701 * it as a temporary condition. But we can't fail. From a
2702 * kmem/slab perspective, the cache has already been selected,
2703 * by mem_cgroup_kmem_get_cache(), so it is too late to change
2704 * our minds.
2705 *
2706 * This condition will only trigger if the task entered
2707 * memcg_charge_kmem in a sane state, but was OOM-killed during
2708 * __mem_cgroup_try_charge() above. Tasks that were already
2709 * dying when the allocation triggers should have been already
2710 * directed to the root cgroup in memcontrol.h
2711 */
2712 res_counter_charge_nofail(&memcg->res, size, &fail_res);
2713 if (do_swap_account)
2714 res_counter_charge_nofail(&memcg->memsw, size,
2715 &fail_res);
2716 ret = 0;
2717 } else if (ret)
2718 res_counter_uncharge(&memcg->kmem, size);
2719
2720 return ret;
2721}
2722
2723static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
2724{
2725 res_counter_uncharge(&memcg->kmem, size);
2726 res_counter_uncharge(&memcg->res, size);
2727 if (do_swap_account)
2728 res_counter_uncharge(&memcg->memsw, size);
2729}
2730
2731/*
2732 * We need to verify if the allocation against current->mm->owner's memcg is
2733 * possible for the given order. But the page is not allocated yet, so we'll
2734 * need a further commit step to do the final arrangements.
2735 *
2736 * It is possible for the task to switch cgroups in this mean time, so at
2737 * commit time, we can't rely on task conversion any longer. We'll then use
2738 * the handle argument to return to the caller which cgroup we should commit
2739 * against. We could also return the memcg directly and avoid the pointer
2740 * passing, but a boolean return value gives better semantics considering
2741 * the compiled-out case as well.
2742 *
2743 * Returning true means the allocation is possible.
2744 */
2745bool
2746__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
2747{
2748 struct mem_cgroup *memcg;
2749 int ret;
2750
2751 *_memcg = NULL;
2752 memcg = try_get_mem_cgroup_from_mm(current->mm);
2753
2754 /*
2755 * very rare case described in mem_cgroup_from_task. Unfortunately there
2756 * isn't much we can do without complicating this too much, and it would
2757 * be gfp-dependent anyway. Just let it go
2758 */
2759 if (unlikely(!memcg))
2760 return true;
2761
2762 if (!memcg_can_account_kmem(memcg)) {
2763 css_put(&memcg->css);
2764 return true;
2765 }
2766
2767 mem_cgroup_get(memcg);
2768
2769 ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
2770 if (!ret)
2771 *_memcg = memcg;
2772 else
2773 mem_cgroup_put(memcg);
2774
2775 css_put(&memcg->css);
2776 return (ret == 0);
2777}
2778
2779void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
2780 int order)
2781{
2782 struct page_cgroup *pc;
2783
2784 VM_BUG_ON(mem_cgroup_is_root(memcg));
2785
2786 /* The page allocation failed. Revert */
2787 if (!page) {
2788 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2789 mem_cgroup_put(memcg);
2790 return;
2791 }
2792
2793 pc = lookup_page_cgroup(page);
2794 lock_page_cgroup(pc);
2795 pc->mem_cgroup = memcg;
2796 SetPageCgroupUsed(pc);
2797 unlock_page_cgroup(pc);
2798}
2799
2800void __memcg_kmem_uncharge_pages(struct page *page, int order)
2801{
2802 struct mem_cgroup *memcg = NULL;
2803 struct page_cgroup *pc;
2804
2805
2806 pc = lookup_page_cgroup(page);
2807 /*
2808 * Fast unlocked return. Theoretically might have changed, have to
2809 * check again after locking.
2810 */
2811 if (!PageCgroupUsed(pc))
2812 return;
2813
2814 lock_page_cgroup(pc);
2815 if (PageCgroupUsed(pc)) {
2816 memcg = pc->mem_cgroup;
2817 ClearPageCgroupUsed(pc);
2818 }
2819 unlock_page_cgroup(pc);
2820
2821 /*
2822 * We trust that only if there is a memcg associated with the page, it
2823 * is a valid allocation
2824 */
2825 if (!memcg)
2826 return;
2827
2828 VM_BUG_ON(mem_cgroup_is_root(memcg));
2829 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2830 mem_cgroup_put(memcg);
2831}
2832#endif /* CONFIG_MEMCG_KMEM */
2833
2664#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2834#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2665 2835
2666#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) 2836#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)