aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c170
1 files changed, 83 insertions, 87 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd845d25827a..5e7a14d117c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -660,40 +660,57 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
660 return mem; 660 return mem;
661} 661}
662 662
663/* 663/* The caller has to guarantee "mem" exists before calling this */
664 * Call callback function against all cgroup under hierarchy tree. 664static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
665 */
666static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
667 int (*func)(struct mem_cgroup *, void *))
668{ 665{
669 int found, ret, nextid; 666 if (mem && css_tryget(&mem->css))
667 return mem;
668 return NULL;
669}
670
671static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
672 struct mem_cgroup *root,
673 bool cond)
674{
675 int nextid = css_id(&iter->css) + 1;
676 int found;
677 int hierarchy_used;
670 struct cgroup_subsys_state *css; 678 struct cgroup_subsys_state *css;
671 struct mem_cgroup *mem;
672 679
673 if (!root->use_hierarchy) 680 hierarchy_used = iter->use_hierarchy;
674 return (*func)(root, data);
675 681
676 nextid = 1; 682 css_put(&iter->css);
677 do { 683 if (!cond || !hierarchy_used)
678 ret = 0; 684 return NULL;
679 mem = NULL;
680 685
686 do {
687 iter = NULL;
681 rcu_read_lock(); 688 rcu_read_lock();
682 css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, 689
683 &found); 690 css = css_get_next(&mem_cgroup_subsys, nextid,
691 &root->css, &found);
684 if (css && css_tryget(css)) 692 if (css && css_tryget(css))
685 mem = container_of(css, struct mem_cgroup, css); 693 iter = container_of(css, struct mem_cgroup, css);
686 rcu_read_unlock(); 694 rcu_read_unlock();
687 695 /* If css is NULL, no more cgroups will be found */
688 if (mem) {
689 ret = (*func)(mem, data);
690 css_put(&mem->css);
691 }
692 nextid = found + 1; 696 nextid = found + 1;
693 } while (!ret && css); 697 } while (css && !iter);
694 698
695 return ret; 699 return iter;
696} 700}
701/*
702 * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please
703 * be careful that "break" loop is not allowed. We have reference count.
704 * Instead of that modify "cond" to be false and "continue" to exit the loop.
705 */
706#define for_each_mem_cgroup_tree_cond(iter, root, cond) \
707 for (iter = mem_cgroup_start_loop(root);\
708 iter != NULL;\
709 iter = mem_cgroup_get_next(iter, root, cond))
710
711#define for_each_mem_cgroup_tree(iter, root) \
712 for_each_mem_cgroup_tree_cond(iter, root, true)
713
697 714
698static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) 715static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
699{ 716{
@@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
1132 return false; 1149 return false;
1133} 1150}
1134 1151
1135static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
1136{
1137 int *val = data;
1138 (*val)++;
1139 return 0;
1140}
1141
1142/** 1152/**
1143 * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. 1153 * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
1144 * @memcg: The memory cgroup that went over limit 1154 * @memcg: The memory cgroup that went over limit
@@ -1213,7 +1223,10 @@ done:
1213static int mem_cgroup_count_children(struct mem_cgroup *mem) 1223static int mem_cgroup_count_children(struct mem_cgroup *mem)
1214{ 1224{
1215 int num = 0; 1225 int num = 0;
1216 mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); 1226 struct mem_cgroup *iter;
1227
1228 for_each_mem_cgroup_tree(iter, mem)
1229 num++;
1217 return num; 1230 return num;
1218} 1231}
1219 1232
@@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1362 return total; 1375 return total;
1363} 1376}
1364 1377
1365static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
1366{
1367 int *val = (int *)data;
1368 int x;
1369 /*
1370 * Logically, we can stop scanning immediately when we find
1371 * a memcg is already locked. But condidering unlock ops and
1372 * creation/removal of memcg, scan-all is simple operation.
1373 */
1374 x = atomic_inc_return(&mem->oom_lock);
1375 *val = max(x, *val);
1376 return 0;
1377}
1378/* 1378/*
1379 * Check OOM-Killer is already running under our hierarchy. 1379 * Check OOM-Killer is already running under our hierarchy.
1380 * If someone is running, return false. 1380 * If someone is running, return false.
1381 */ 1381 */
1382static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) 1382static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1383{ 1383{
1384 int lock_count = 0; 1384 int x, lock_count = 0;
1385 struct mem_cgroup *iter;
1385 1386
1386 mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); 1387 for_each_mem_cgroup_tree(iter, mem) {
1388 x = atomic_inc_return(&iter->oom_lock);
1389 lock_count = max(x, lock_count);
1390 }
1387 1391
1388 if (lock_count == 1) 1392 if (lock_count == 1)
1389 return true; 1393 return true;
1390 return false; 1394 return false;
1391} 1395}
1392 1396
1393static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) 1397static int mem_cgroup_oom_unlock(struct mem_cgroup *mem)
1394{ 1398{
1399 struct mem_cgroup *iter;
1400
1395 /* 1401 /*
1396 * When a new child is created while the hierarchy is under oom, 1402 * When a new child is created while the hierarchy is under oom,
1397 * mem_cgroup_oom_lock() may not be called. We have to use 1403 * mem_cgroup_oom_lock() may not be called. We have to use
1398 * atomic_add_unless() here. 1404 * atomic_add_unless() here.
1399 */ 1405 */
1400 atomic_add_unless(&mem->oom_lock, -1, 0); 1406 for_each_mem_cgroup_tree(iter, mem)
1407 atomic_add_unless(&iter->oom_lock, -1, 0);
1401 return 0; 1408 return 0;
1402} 1409}
1403 1410
1404static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
1405{
1406 mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb);
1407}
1408 1411
1409static DEFINE_MUTEX(memcg_oom_mutex); 1412static DEFINE_MUTEX(memcg_oom_mutex);
1410static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); 1413static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
@@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3207 return retval; 3210 return retval;
3208} 3211}
3209 3212
3210struct mem_cgroup_idx_data {
3211 s64 val;
3212 enum mem_cgroup_stat_index idx;
3213};
3214 3213
3215static int 3214static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
3216mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) 3215 enum mem_cgroup_stat_index idx)
3217{ 3216{
3218 struct mem_cgroup_idx_data *d = data; 3217 struct mem_cgroup *iter;
3219 d->val += mem_cgroup_read_stat(mem, d->idx); 3218 s64 val = 0;
3220 return 0;
3221}
3222 3219
3223static void 3220 /* each per cpu's value can be minus.Then, use s64 */
3224mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, 3221 for_each_mem_cgroup_tree(iter, mem)
3225 enum mem_cgroup_stat_index idx, s64 *val) 3222 val += mem_cgroup_read_stat(iter, idx);
3226{ 3223
3227 struct mem_cgroup_idx_data d; 3224 if (val < 0) /* race ? */
3228 d.idx = idx; 3225 val = 0;
3229 d.val = 0; 3226 return val;
3230 mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
3231 *val = d.val;
3232} 3227}
3233 3228
3234static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) 3229static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
3235{ 3230{
3236 u64 idx_val, val; 3231 u64 val;
3237 3232
3238 if (!mem_cgroup_is_root(mem)) { 3233 if (!mem_cgroup_is_root(mem)) {
3239 if (!swap) 3234 if (!swap)
@@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
3242 return res_counter_read_u64(&mem->memsw, RES_USAGE); 3237 return res_counter_read_u64(&mem->memsw, RES_USAGE);
3243 } 3238 }
3244 3239
3245 mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); 3240 val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE);
3246 val = idx_val; 3241 val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS);
3247 mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val);
3248 val += idx_val;
3249 3242
3250 if (swap) { 3243 if (swap)
3251 mem_cgroup_get_recursive_idx_stat(mem, 3244 val += mem_cgroup_get_recursive_idx_stat(mem,
3252 MEM_CGROUP_STAT_SWAPOUT, &idx_val); 3245 MEM_CGROUP_STAT_SWAPOUT);
3253 val += idx_val;
3254 }
3255 3246
3256 return val << PAGE_SHIFT; 3247 return val << PAGE_SHIFT;
3257} 3248}
@@ -3459,9 +3450,9 @@ struct {
3459}; 3450};
3460 3451
3461 3452
3462static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) 3453static void
3454mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
3463{ 3455{
3464 struct mcs_total_stat *s = data;
3465 s64 val; 3456 s64 val;
3466 3457
3467 /* per cpu stat */ 3458 /* per cpu stat */
@@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
3491 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; 3482 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
3492 val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); 3483 val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
3493 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; 3484 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
3494 return 0;
3495} 3485}
3496 3486
3497static void 3487static void
3498mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) 3488mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
3499{ 3489{
3500 mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); 3490 struct mem_cgroup *iter;
3491
3492 for_each_mem_cgroup_tree(iter, mem)
3493 mem_cgroup_get_local_stat(iter, s);
3501} 3494}
3502 3495
3503static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, 3496static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
@@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b)
3674 return _a->threshold - _b->threshold; 3667 return _a->threshold - _b->threshold;
3675} 3668}
3676 3669
3677static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) 3670static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
3678{ 3671{
3679 struct mem_cgroup_eventfd_list *ev; 3672 struct mem_cgroup_eventfd_list *ev;
3680 3673
@@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
3685 3678
3686static void mem_cgroup_oom_notify(struct mem_cgroup *mem) 3679static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
3687{ 3680{
3688 mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); 3681 struct mem_cgroup *iter;
3682
3683 for_each_mem_cgroup_tree(iter, mem)
3684 mem_cgroup_oom_notify_cb(iter);
3689} 3685}
3690 3686
3691static int mem_cgroup_usage_register_event(struct cgroup *cgrp, 3687static int mem_cgroup_usage_register_event(struct cgroup *cgrp,