diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 170 |
1 files changed, 83 insertions, 87 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd845d25827a..5e7a14d117c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -660,40 +660,57 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
660 | return mem; | 660 | return mem; |
661 | } | 661 | } |
662 | 662 | ||
663 | /* | 663 | /* The caller has to guarantee "mem" exists before calling this */ |
664 | * Call callback function against all cgroup under hierarchy tree. | 664 | static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) |
665 | */ | ||
666 | static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | ||
667 | int (*func)(struct mem_cgroup *, void *)) | ||
668 | { | 665 | { |
669 | int found, ret, nextid; | 666 | if (mem && css_tryget(&mem->css)) |
667 | return mem; | ||
668 | return NULL; | ||
669 | } | ||
670 | |||
671 | static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | ||
672 | struct mem_cgroup *root, | ||
673 | bool cond) | ||
674 | { | ||
675 | int nextid = css_id(&iter->css) + 1; | ||
676 | int found; | ||
677 | int hierarchy_used; | ||
670 | struct cgroup_subsys_state *css; | 678 | struct cgroup_subsys_state *css; |
671 | struct mem_cgroup *mem; | ||
672 | 679 | ||
673 | if (!root->use_hierarchy) | 680 | hierarchy_used = iter->use_hierarchy; |
674 | return (*func)(root, data); | ||
675 | 681 | ||
676 | nextid = 1; | 682 | css_put(&iter->css); |
677 | do { | 683 | if (!cond || !hierarchy_used) |
678 | ret = 0; | 684 | return NULL; |
679 | mem = NULL; | ||
680 | 685 | ||
686 | do { | ||
687 | iter = NULL; | ||
681 | rcu_read_lock(); | 688 | rcu_read_lock(); |
682 | css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, | 689 | |
683 | &found); | 690 | css = css_get_next(&mem_cgroup_subsys, nextid, |
691 | &root->css, &found); | ||
684 | if (css && css_tryget(css)) | 692 | if (css && css_tryget(css)) |
685 | mem = container_of(css, struct mem_cgroup, css); | 693 | iter = container_of(css, struct mem_cgroup, css); |
686 | rcu_read_unlock(); | 694 | rcu_read_unlock(); |
687 | 695 | /* If css is NULL, no more cgroups will be found */ | |
688 | if (mem) { | ||
689 | ret = (*func)(mem, data); | ||
690 | css_put(&mem->css); | ||
691 | } | ||
692 | nextid = found + 1; | 696 | nextid = found + 1; |
693 | } while (!ret && css); | 697 | } while (css && !iter); |
694 | 698 | ||
695 | return ret; | 699 | return iter; |
696 | } | 700 | } |
701 | /* | ||
702 | * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please | ||
703 | * be careful that "break" loop is not allowed. We have reference count. | ||
704 | * Instead of that modify "cond" to be false and "continue" to exit the loop. | ||
705 | */ | ||
706 | #define for_each_mem_cgroup_tree_cond(iter, root, cond) \ | ||
707 | for (iter = mem_cgroup_start_loop(root);\ | ||
708 | iter != NULL;\ | ||
709 | iter = mem_cgroup_get_next(iter, root, cond)) | ||
710 | |||
711 | #define for_each_mem_cgroup_tree(iter, root) \ | ||
712 | for_each_mem_cgroup_tree_cond(iter, root, true) | ||
713 | |||
697 | 714 | ||
698 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | 715 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) |
699 | { | 716 | { |
@@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) | |||
1132 | return false; | 1149 | return false; |
1133 | } | 1150 | } |
1134 | 1151 | ||
1135 | static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) | ||
1136 | { | ||
1137 | int *val = data; | ||
1138 | (*val)++; | ||
1139 | return 0; | ||
1140 | } | ||
1141 | |||
1142 | /** | 1152 | /** |
1143 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. | 1153 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. |
1144 | * @memcg: The memory cgroup that went over limit | 1154 | * @memcg: The memory cgroup that went over limit |
@@ -1213,7 +1223,10 @@ done: | |||
1213 | static int mem_cgroup_count_children(struct mem_cgroup *mem) | 1223 | static int mem_cgroup_count_children(struct mem_cgroup *mem) |
1214 | { | 1224 | { |
1215 | int num = 0; | 1225 | int num = 0; |
1216 | mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); | 1226 | struct mem_cgroup *iter; |
1227 | |||
1228 | for_each_mem_cgroup_tree(iter, mem) | ||
1229 | num++; | ||
1217 | return num; | 1230 | return num; |
1218 | } | 1231 | } |
1219 | 1232 | ||
@@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1362 | return total; | 1375 | return total; |
1363 | } | 1376 | } |
1364 | 1377 | ||
1365 | static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data) | ||
1366 | { | ||
1367 | int *val = (int *)data; | ||
1368 | int x; | ||
1369 | /* | ||
1370 | * Logically, we can stop scanning immediately when we find | ||
1371 | * a memcg is already locked. But condidering unlock ops and | ||
1372 | * creation/removal of memcg, scan-all is simple operation. | ||
1373 | */ | ||
1374 | x = atomic_inc_return(&mem->oom_lock); | ||
1375 | *val = max(x, *val); | ||
1376 | return 0; | ||
1377 | } | ||
1378 | /* | 1378 | /* |
1379 | * Check OOM-Killer is already running under our hierarchy. | 1379 | * Check OOM-Killer is already running under our hierarchy. |
1380 | * If someone is running, return false. | 1380 | * If someone is running, return false. |
1381 | */ | 1381 | */ |
1382 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1382 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
1383 | { | 1383 | { |
1384 | int lock_count = 0; | 1384 | int x, lock_count = 0; |
1385 | struct mem_cgroup *iter; | ||
1385 | 1386 | ||
1386 | mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); | 1387 | for_each_mem_cgroup_tree(iter, mem) { |
1388 | x = atomic_inc_return(&iter->oom_lock); | ||
1389 | lock_count = max(x, lock_count); | ||
1390 | } | ||
1387 | 1391 | ||
1388 | if (lock_count == 1) | 1392 | if (lock_count == 1) |
1389 | return true; | 1393 | return true; |
1390 | return false; | 1394 | return false; |
1391 | } | 1395 | } |
1392 | 1396 | ||
1393 | static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) | 1397 | static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) |
1394 | { | 1398 | { |
1399 | struct mem_cgroup *iter; | ||
1400 | |||
1395 | /* | 1401 | /* |
1396 | * When a new child is created while the hierarchy is under oom, | 1402 | * When a new child is created while the hierarchy is under oom, |
1397 | * mem_cgroup_oom_lock() may not be called. We have to use | 1403 | * mem_cgroup_oom_lock() may not be called. We have to use |
1398 | * atomic_add_unless() here. | 1404 | * atomic_add_unless() here. |
1399 | */ | 1405 | */ |
1400 | atomic_add_unless(&mem->oom_lock, -1, 0); | 1406 | for_each_mem_cgroup_tree(iter, mem) |
1407 | atomic_add_unless(&iter->oom_lock, -1, 0); | ||
1401 | return 0; | 1408 | return 0; |
1402 | } | 1409 | } |
1403 | 1410 | ||
1404 | static void mem_cgroup_oom_unlock(struct mem_cgroup *mem) | ||
1405 | { | ||
1406 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb); | ||
1407 | } | ||
1408 | 1411 | ||
1409 | static DEFINE_MUTEX(memcg_oom_mutex); | 1412 | static DEFINE_MUTEX(memcg_oom_mutex); |
1410 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); | 1413 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); |
@@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3207 | return retval; | 3210 | return retval; |
3208 | } | 3211 | } |
3209 | 3212 | ||
3210 | struct mem_cgroup_idx_data { | ||
3211 | s64 val; | ||
3212 | enum mem_cgroup_stat_index idx; | ||
3213 | }; | ||
3214 | 3213 | ||
3215 | static int | 3214 | static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, |
3216 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | 3215 | enum mem_cgroup_stat_index idx) |
3217 | { | 3216 | { |
3218 | struct mem_cgroup_idx_data *d = data; | 3217 | struct mem_cgroup *iter; |
3219 | d->val += mem_cgroup_read_stat(mem, d->idx); | 3218 | s64 val = 0; |
3220 | return 0; | ||
3221 | } | ||
3222 | 3219 | ||
3223 | static void | 3220 | /* each per cpu's value can be minus.Then, use s64 */ |
3224 | mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | 3221 | for_each_mem_cgroup_tree(iter, mem) |
3225 | enum mem_cgroup_stat_index idx, s64 *val) | 3222 | val += mem_cgroup_read_stat(iter, idx); |
3226 | { | 3223 | |
3227 | struct mem_cgroup_idx_data d; | 3224 | if (val < 0) /* race ? */ |
3228 | d.idx = idx; | 3225 | val = 0; |
3229 | d.val = 0; | 3226 | return val; |
3230 | mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | ||
3231 | *val = d.val; | ||
3232 | } | 3227 | } |
3233 | 3228 | ||
3234 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | 3229 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) |
3235 | { | 3230 | { |
3236 | u64 idx_val, val; | 3231 | u64 val; |
3237 | 3232 | ||
3238 | if (!mem_cgroup_is_root(mem)) { | 3233 | if (!mem_cgroup_is_root(mem)) { |
3239 | if (!swap) | 3234 | if (!swap) |
@@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | |||
3242 | return res_counter_read_u64(&mem->memsw, RES_USAGE); | 3237 | return res_counter_read_u64(&mem->memsw, RES_USAGE); |
3243 | } | 3238 | } |
3244 | 3239 | ||
3245 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); | 3240 | val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); |
3246 | val = idx_val; | 3241 | val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); |
3247 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val); | ||
3248 | val += idx_val; | ||
3249 | 3242 | ||
3250 | if (swap) { | 3243 | if (swap) |
3251 | mem_cgroup_get_recursive_idx_stat(mem, | 3244 | val += mem_cgroup_get_recursive_idx_stat(mem, |
3252 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | 3245 | MEM_CGROUP_STAT_SWAPOUT); |
3253 | val += idx_val; | ||
3254 | } | ||
3255 | 3246 | ||
3256 | return val << PAGE_SHIFT; | 3247 | return val << PAGE_SHIFT; |
3257 | } | 3248 | } |
@@ -3459,9 +3450,9 @@ struct { | |||
3459 | }; | 3450 | }; |
3460 | 3451 | ||
3461 | 3452 | ||
3462 | static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | 3453 | static void |
3454 | mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | ||
3463 | { | 3455 | { |
3464 | struct mcs_total_stat *s = data; | ||
3465 | s64 val; | 3456 | s64 val; |
3466 | 3457 | ||
3467 | /* per cpu stat */ | 3458 | /* per cpu stat */ |
@@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
3491 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; | 3482 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; |
3492 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); | 3483 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); |
3493 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; | 3484 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; |
3494 | return 0; | ||
3495 | } | 3485 | } |
3496 | 3486 | ||
3497 | static void | 3487 | static void |
3498 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | 3488 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) |
3499 | { | 3489 | { |
3500 | mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); | 3490 | struct mem_cgroup *iter; |
3491 | |||
3492 | for_each_mem_cgroup_tree(iter, mem) | ||
3493 | mem_cgroup_get_local_stat(iter, s); | ||
3501 | } | 3494 | } |
3502 | 3495 | ||
3503 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | 3496 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
@@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b) | |||
3674 | return _a->threshold - _b->threshold; | 3667 | return _a->threshold - _b->threshold; |
3675 | } | 3668 | } |
3676 | 3669 | ||
3677 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | 3670 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) |
3678 | { | 3671 | { |
3679 | struct mem_cgroup_eventfd_list *ev; | 3672 | struct mem_cgroup_eventfd_list *ev; |
3680 | 3673 | ||
@@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | |||
3685 | 3678 | ||
3686 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) | 3679 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) |
3687 | { | 3680 | { |
3688 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); | 3681 | struct mem_cgroup *iter; |
3682 | |||
3683 | for_each_mem_cgroup_tree(iter, mem) | ||
3684 | mem_cgroup_oom_notify_cb(iter); | ||
3689 | } | 3685 | } |
3690 | 3686 | ||
3691 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | 3687 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, |