aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2013-09-12 18:13:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 18:38:00 -0400
commite883110aad718b65de658db77387aaa69cce996d (patch)
treec0086d72e42f19f2cd29a5745f34525e761436ca /mm
parent3b38722efd9f66da63bbbd41520c2e6fa9db3d68 (diff)
memcg: get rid of soft-limit tree infrastructure
Now that the soft limit is integrated to the reclaim directly the whole soft-limit tree infrastructure is not needed anymore. Rip it out. Signed-off-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Glauber Costa <glommer@openvz.org> Reviewed-by: Tejun Heo <tj@kernel.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ying Han <yinghan@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c265
1 files changed, 2 insertions, 263 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6c32271a31c5..87a448dd9c10 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -39,7 +39,6 @@
39#include <linux/limits.h> 39#include <linux/limits.h>
40#include <linux/export.h> 40#include <linux/export.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/rbtree.h>
43#include <linux/slab.h> 42#include <linux/slab.h>
44#include <linux/swap.h> 43#include <linux/swap.h>
45#include <linux/swapops.h> 44#include <linux/swapops.h>
@@ -139,7 +138,6 @@ static const char * const mem_cgroup_lru_names[] = {
139 */ 138 */
140enum mem_cgroup_events_target { 139enum mem_cgroup_events_target {
141 MEM_CGROUP_TARGET_THRESH, 140 MEM_CGROUP_TARGET_THRESH,
142 MEM_CGROUP_TARGET_SOFTLIMIT,
143 MEM_CGROUP_TARGET_NUMAINFO, 141 MEM_CGROUP_TARGET_NUMAINFO,
144 MEM_CGROUP_NTARGETS, 142 MEM_CGROUP_NTARGETS,
145}; 143};
@@ -175,10 +173,6 @@ struct mem_cgroup_per_zone {
175 173
176 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; 174 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
177 175
178 struct rb_node tree_node; /* RB tree node */
179 unsigned long long usage_in_excess;/* Set to the value by which */
180 /* the soft limit is exceeded*/
181 bool on_tree;
182 struct mem_cgroup *memcg; /* Back pointer, we cannot */ 176 struct mem_cgroup *memcg; /* Back pointer, we cannot */
183 /* use container_of */ 177 /* use container_of */
184}; 178};
@@ -187,26 +181,6 @@ struct mem_cgroup_per_node {
187 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; 181 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
188}; 182};
189 183
190/*
191 * Cgroups above their limits are maintained in a RB-Tree, independent of
192 * their hierarchy representation
193 */
194
195struct mem_cgroup_tree_per_zone {
196 struct rb_root rb_root;
197 spinlock_t lock;
198};
199
200struct mem_cgroup_tree_per_node {
201 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
202};
203
204struct mem_cgroup_tree {
205 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
206};
207
208static struct mem_cgroup_tree soft_limit_tree __read_mostly;
209
210struct mem_cgroup_threshold { 184struct mem_cgroup_threshold {
211 struct eventfd_ctx *eventfd; 185 struct eventfd_ctx *eventfd;
212 u64 threshold; 186 u64 threshold;
@@ -444,7 +418,6 @@ static bool move_file(void)
444 * limit reclaim to prevent infinite loops, if they ever occur. 418 * limit reclaim to prevent infinite loops, if they ever occur.
445 */ 419 */
446#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 420#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
447#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
448 421
449enum charge_type { 422enum charge_type {
450 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 423 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -671,164 +644,6 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
671 return mem_cgroup_zoneinfo(memcg, nid, zid); 644 return mem_cgroup_zoneinfo(memcg, nid, zid);
672} 645}
673 646
674static struct mem_cgroup_tree_per_zone *
675soft_limit_tree_node_zone(int nid, int zid)
676{
677 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
678}
679
680static struct mem_cgroup_tree_per_zone *
681soft_limit_tree_from_page(struct page *page)
682{
683 int nid = page_to_nid(page);
684 int zid = page_zonenum(page);
685
686 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
687}
688
689static void
690__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
691 struct mem_cgroup_per_zone *mz,
692 struct mem_cgroup_tree_per_zone *mctz,
693 unsigned long long new_usage_in_excess)
694{
695 struct rb_node **p = &mctz->rb_root.rb_node;
696 struct rb_node *parent = NULL;
697 struct mem_cgroup_per_zone *mz_node;
698
699 if (mz->on_tree)
700 return;
701
702 mz->usage_in_excess = new_usage_in_excess;
703 if (!mz->usage_in_excess)
704 return;
705 while (*p) {
706 parent = *p;
707 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
708 tree_node);
709 if (mz->usage_in_excess < mz_node->usage_in_excess)
710 p = &(*p)->rb_left;
711 /*
712 * We can't avoid mem cgroups that are over their soft
713 * limit by the same amount
714 */
715 else if (mz->usage_in_excess >= mz_node->usage_in_excess)
716 p = &(*p)->rb_right;
717 }
718 rb_link_node(&mz->tree_node, parent, p);
719 rb_insert_color(&mz->tree_node, &mctz->rb_root);
720 mz->on_tree = true;
721}
722
723static void
724__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
725 struct mem_cgroup_per_zone *mz,
726 struct mem_cgroup_tree_per_zone *mctz)
727{
728 if (!mz->on_tree)
729 return;
730 rb_erase(&mz->tree_node, &mctz->rb_root);
731 mz->on_tree = false;
732}
733
734static void
735mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
736 struct mem_cgroup_per_zone *mz,
737 struct mem_cgroup_tree_per_zone *mctz)
738{
739 spin_lock(&mctz->lock);
740 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
741 spin_unlock(&mctz->lock);
742}
743
744
745static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
746{
747 unsigned long long excess;
748 struct mem_cgroup_per_zone *mz;
749 struct mem_cgroup_tree_per_zone *mctz;
750 int nid = page_to_nid(page);
751 int zid = page_zonenum(page);
752 mctz = soft_limit_tree_from_page(page);
753
754 /*
755 * Necessary to update all ancestors when hierarchy is used.
756 * because their event counter is not touched.
757 */
758 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
759 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
760 excess = res_counter_soft_limit_excess(&memcg->res);
761 /*
762 * We have to update the tree if mz is on RB-tree or
763 * mem is over its softlimit.
764 */
765 if (excess || mz->on_tree) {
766 spin_lock(&mctz->lock);
767 /* if on-tree, remove it */
768 if (mz->on_tree)
769 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
770 /*
771 * Insert again. mz->usage_in_excess will be updated.
772 * If excess is 0, no tree ops.
773 */
774 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
775 spin_unlock(&mctz->lock);
776 }
777 }
778}
779
780static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
781{
782 int node, zone;
783 struct mem_cgroup_per_zone *mz;
784 struct mem_cgroup_tree_per_zone *mctz;
785
786 for_each_node(node) {
787 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
788 mz = mem_cgroup_zoneinfo(memcg, node, zone);
789 mctz = soft_limit_tree_node_zone(node, zone);
790 mem_cgroup_remove_exceeded(memcg, mz, mctz);
791 }
792 }
793}
794
795static struct mem_cgroup_per_zone *
796__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
797{
798 struct rb_node *rightmost = NULL;
799 struct mem_cgroup_per_zone *mz;
800
801retry:
802 mz = NULL;
803 rightmost = rb_last(&mctz->rb_root);
804 if (!rightmost)
805 goto done; /* Nothing to reclaim from */
806
807 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
808 /*
809 * Remove the node now but someone else can add it back,
810 * we will to add it back at the end of reclaim to its correct
811 * position in the tree.
812 */
813 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
814 if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
815 !css_tryget(&mz->memcg->css))
816 goto retry;
817done:
818 return mz;
819}
820
821static struct mem_cgroup_per_zone *
822mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
823{
824 struct mem_cgroup_per_zone *mz;
825
826 spin_lock(&mctz->lock);
827 mz = __mem_cgroup_largest_soft_limit_node(mctz);
828 spin_unlock(&mctz->lock);
829 return mz;
830}
831
832/* 647/*
833 * Implementation Note: reading percpu statistics for memcg. 648 * Implementation Note: reading percpu statistics for memcg.
834 * 649 *
@@ -987,9 +802,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
987 case MEM_CGROUP_TARGET_THRESH: 802 case MEM_CGROUP_TARGET_THRESH:
988 next = val + THRESHOLDS_EVENTS_TARGET; 803 next = val + THRESHOLDS_EVENTS_TARGET;
989 break; 804 break;
990 case MEM_CGROUP_TARGET_SOFTLIMIT:
991 next = val + SOFTLIMIT_EVENTS_TARGET;
992 break;
993 case MEM_CGROUP_TARGET_NUMAINFO: 805 case MEM_CGROUP_TARGET_NUMAINFO:
994 next = val + NUMAINFO_EVENTS_TARGET; 806 next = val + NUMAINFO_EVENTS_TARGET;
995 break; 807 break;
@@ -1012,11 +824,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1012 /* threshold event is triggered in finer grain than soft limit */ 824 /* threshold event is triggered in finer grain than soft limit */
1013 if (unlikely(mem_cgroup_event_ratelimit(memcg, 825 if (unlikely(mem_cgroup_event_ratelimit(memcg,
1014 MEM_CGROUP_TARGET_THRESH))) { 826 MEM_CGROUP_TARGET_THRESH))) {
1015 bool do_softlimit;
1016 bool do_numainfo __maybe_unused; 827 bool do_numainfo __maybe_unused;
1017 828
1018 do_softlimit = mem_cgroup_event_ratelimit(memcg,
1019 MEM_CGROUP_TARGET_SOFTLIMIT);
1020#if MAX_NUMNODES > 1 829#if MAX_NUMNODES > 1
1021 do_numainfo = mem_cgroup_event_ratelimit(memcg, 830 do_numainfo = mem_cgroup_event_ratelimit(memcg,
1022 MEM_CGROUP_TARGET_NUMAINFO); 831 MEM_CGROUP_TARGET_NUMAINFO);
@@ -1024,8 +833,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1024 preempt_enable(); 833 preempt_enable();
1025 834
1026 mem_cgroup_threshold(memcg); 835 mem_cgroup_threshold(memcg);
1027 if (unlikely(do_softlimit))
1028 mem_cgroup_update_tree(memcg, page);
1029#if MAX_NUMNODES > 1 836#if MAX_NUMNODES > 1
1030 if (unlikely(do_numainfo)) 837 if (unlikely(do_numainfo))
1031 atomic_inc(&memcg->numainfo_events); 838 atomic_inc(&memcg->numainfo_events);
@@ -1867,6 +1674,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1867 return total; 1674 return total;
1868} 1675}
1869 1676
1677#if MAX_NUMNODES > 1
1870/** 1678/**
1871 * test_mem_cgroup_node_reclaimable 1679 * test_mem_cgroup_node_reclaimable
1872 * @memcg: the target memcg 1680 * @memcg: the target memcg
@@ -1889,7 +1697,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1889 return false; 1697 return false;
1890 1698
1891} 1699}
1892#if MAX_NUMNODES > 1
1893 1700
1894/* 1701/*
1895 * Always updating the nodemask is not very good - even if we have an empty 1702 * Always updating the nodemask is not very good - even if we have an empty
@@ -1957,51 +1764,12 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1957 return node; 1764 return node;
1958} 1765}
1959 1766
1960/*
1961 * Check all nodes whether it contains reclaimable pages or not.
1962 * For quick scan, we make use of scan_nodes. This will allow us to skip
1963 * unused nodes. But scan_nodes is lazily updated and may not cotain
1964 * enough new information. We need to do double check.
1965 */
1966static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1967{
1968 int nid;
1969
1970 /*
1971 * quick check...making use of scan_node.
1972 * We can skip unused nodes.
1973 */
1974 if (!nodes_empty(memcg->scan_nodes)) {
1975 for (nid = first_node(memcg->scan_nodes);
1976 nid < MAX_NUMNODES;
1977 nid = next_node(nid, memcg->scan_nodes)) {
1978
1979 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1980 return true;
1981 }
1982 }
1983 /*
1984 * Check rest of nodes.
1985 */
1986 for_each_node_state(nid, N_MEMORY) {
1987 if (node_isset(nid, memcg->scan_nodes))
1988 continue;
1989 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1990 return true;
1991 }
1992 return false;
1993}
1994
1995#else 1767#else
1996int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) 1768int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1997{ 1769{
1998 return 0; 1770 return 0;
1999} 1771}
2000 1772
2001static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
2002{
2003 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
2004}
2005#endif 1773#endif
2006 1774
2007/* 1775/*
@@ -2876,9 +2644,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2876 unlock_page_cgroup(pc); 2644 unlock_page_cgroup(pc);
2877 2645
2878 /* 2646 /*
2879 * "charge_statistics" updated event counter. Then, check it. 2647 * "charge_statistics" updated event counter.
2880 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2881 * if they exceeds softlimit.
2882 */ 2648 */
2883 memcg_check_events(memcg, page); 2649 memcg_check_events(memcg, page);
2884} 2650}
@@ -5962,8 +5728,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
5962 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 5728 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
5963 mz = &pn->zoneinfo[zone]; 5729 mz = &pn->zoneinfo[zone];
5964 lruvec_init(&mz->lruvec); 5730 lruvec_init(&mz->lruvec);
5965 mz->usage_in_excess = 0;
5966 mz->on_tree = false;
5967 mz->memcg = memcg; 5731 mz->memcg = memcg;
5968 } 5732 }
5969 memcg->nodeinfo[node] = pn; 5733 memcg->nodeinfo[node] = pn;
@@ -6019,7 +5783,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
6019 int node; 5783 int node;
6020 size_t size = memcg_size(); 5784 size_t size = memcg_size();
6021 5785
6022 mem_cgroup_remove_from_trees(memcg);
6023 free_css_id(&mem_cgroup_subsys, &memcg->css); 5786 free_css_id(&mem_cgroup_subsys, &memcg->css);
6024 5787
6025 for_each_node(node) 5788 for_each_node(node)
@@ -6056,29 +5819,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
6056} 5819}
6057EXPORT_SYMBOL(parent_mem_cgroup); 5820EXPORT_SYMBOL(parent_mem_cgroup);
6058 5821
6059static void __init mem_cgroup_soft_limit_tree_init(void)
6060{
6061 struct mem_cgroup_tree_per_node *rtpn;
6062 struct mem_cgroup_tree_per_zone *rtpz;
6063 int tmp, node, zone;
6064
6065 for_each_node(node) {
6066 tmp = node;
6067 if (!node_state(node, N_NORMAL_MEMORY))
6068 tmp = -1;
6069 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
6070 BUG_ON(!rtpn);
6071
6072 soft_limit_tree.rb_tree_per_node[node] = rtpn;
6073
6074 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
6075 rtpz = &rtpn->rb_tree_per_zone[zone];
6076 rtpz->rb_root = RB_ROOT;
6077 spin_lock_init(&rtpz->lock);
6078 }
6079 }
6080}
6081
6082static struct cgroup_subsys_state * __ref 5822static struct cgroup_subsys_state * __ref
6083mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 5823mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
6084{ 5824{
@@ -6859,7 +6599,6 @@ static int __init mem_cgroup_init(void)
6859{ 6599{
6860 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 6600 hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
6861 enable_swap_cgroup(); 6601 enable_swap_cgroup();
6862 mem_cgroup_soft_limit_tree_init();
6863 memcg_stock_init(); 6602 memcg_stock_init();
6864 return 0; 6603 return 0;
6865} 6604}