diff options
| author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2010-10-27 18:33:41 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:03:09 -0400 |
| commit | 7d74b06f240f1bd1b4b68dd6fe84164d8bf4e315 (patch) | |
| tree | 3b52304b322390a9b5782b4d498e5e0a14e3266b | |
| parent | 32047e2a85f06633ee4c53e2d0346fbcd34e480b (diff) | |
memcg: use for_each_mem_cgroup
In memory cgroup management, we sometimes have to walk through
subhierarchy of cgroup to gather informaiton, or lock something, etc.
Now, to do that, mem_cgroup_walk_tree() function is provided. It calls
given callback function per cgroup found. But the bad thing is that it
has to pass a fixed style function and argument, "void*" and it adds much
type casting to memcontrol.c.
To make the code clean, this patch replaces walk_tree() with
for_each_mem_cgroup_tree(iter, root)
An iterator style call. The good point is that iterator call doesn't have
to assume what kind of function is called under it. A bad point is that
it may cause reference-count leak if a caller use "break" from the loop by
mistake.
I think the benefit is larger. The modified code seems straigtforward and
easy to read because we don't have misterious callbacks and pointer cast.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/memcontrol.c | 170 |
1 files changed, 83 insertions, 87 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd845d25827a..5e7a14d117c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -660,40 +660,57 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
| 660 | return mem; | 660 | return mem; |
| 661 | } | 661 | } |
| 662 | 662 | ||
| 663 | /* | 663 | /* The caller has to guarantee "mem" exists before calling this */ |
| 664 | * Call callback function against all cgroup under hierarchy tree. | 664 | static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) |
| 665 | */ | ||
| 666 | static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | ||
| 667 | int (*func)(struct mem_cgroup *, void *)) | ||
| 668 | { | 665 | { |
| 669 | int found, ret, nextid; | 666 | if (mem && css_tryget(&mem->css)) |
| 667 | return mem; | ||
| 668 | return NULL; | ||
| 669 | } | ||
| 670 | |||
| 671 | static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | ||
| 672 | struct mem_cgroup *root, | ||
| 673 | bool cond) | ||
| 674 | { | ||
| 675 | int nextid = css_id(&iter->css) + 1; | ||
| 676 | int found; | ||
| 677 | int hierarchy_used; | ||
| 670 | struct cgroup_subsys_state *css; | 678 | struct cgroup_subsys_state *css; |
| 671 | struct mem_cgroup *mem; | ||
| 672 | 679 | ||
| 673 | if (!root->use_hierarchy) | 680 | hierarchy_used = iter->use_hierarchy; |
| 674 | return (*func)(root, data); | ||
| 675 | 681 | ||
| 676 | nextid = 1; | 682 | css_put(&iter->css); |
| 677 | do { | 683 | if (!cond || !hierarchy_used) |
| 678 | ret = 0; | 684 | return NULL; |
| 679 | mem = NULL; | ||
| 680 | 685 | ||
| 686 | do { | ||
| 687 | iter = NULL; | ||
| 681 | rcu_read_lock(); | 688 | rcu_read_lock(); |
| 682 | css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, | 689 | |
| 683 | &found); | 690 | css = css_get_next(&mem_cgroup_subsys, nextid, |
| 691 | &root->css, &found); | ||
| 684 | if (css && css_tryget(css)) | 692 | if (css && css_tryget(css)) |
| 685 | mem = container_of(css, struct mem_cgroup, css); | 693 | iter = container_of(css, struct mem_cgroup, css); |
| 686 | rcu_read_unlock(); | 694 | rcu_read_unlock(); |
| 687 | 695 | /* If css is NULL, no more cgroups will be found */ | |
| 688 | if (mem) { | ||
| 689 | ret = (*func)(mem, data); | ||
| 690 | css_put(&mem->css); | ||
| 691 | } | ||
| 692 | nextid = found + 1; | 696 | nextid = found + 1; |
| 693 | } while (!ret && css); | 697 | } while (css && !iter); |
| 694 | 698 | ||
| 695 | return ret; | 699 | return iter; |
| 696 | } | 700 | } |
| 701 | /* | ||
| 702 | * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please | ||
| 703 | * be careful that "break" loop is not allowed. We have reference count. | ||
| 704 | * Instead of that modify "cond" to be false and "continue" to exit the loop. | ||
| 705 | */ | ||
| 706 | #define for_each_mem_cgroup_tree_cond(iter, root, cond) \ | ||
| 707 | for (iter = mem_cgroup_start_loop(root);\ | ||
| 708 | iter != NULL;\ | ||
| 709 | iter = mem_cgroup_get_next(iter, root, cond)) | ||
| 710 | |||
| 711 | #define for_each_mem_cgroup_tree(iter, root) \ | ||
| 712 | for_each_mem_cgroup_tree_cond(iter, root, true) | ||
| 713 | |||
| 697 | 714 | ||
| 698 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | 715 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) |
| 699 | { | 716 | { |
| @@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) | |||
| 1132 | return false; | 1149 | return false; |
| 1133 | } | 1150 | } |
| 1134 | 1151 | ||
| 1135 | static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) | ||
| 1136 | { | ||
| 1137 | int *val = data; | ||
| 1138 | (*val)++; | ||
| 1139 | return 0; | ||
| 1140 | } | ||
| 1141 | |||
| 1142 | /** | 1152 | /** |
| 1143 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. | 1153 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. |
| 1144 | * @memcg: The memory cgroup that went over limit | 1154 | * @memcg: The memory cgroup that went over limit |
| @@ -1213,7 +1223,10 @@ done: | |||
| 1213 | static int mem_cgroup_count_children(struct mem_cgroup *mem) | 1223 | static int mem_cgroup_count_children(struct mem_cgroup *mem) |
| 1214 | { | 1224 | { |
| 1215 | int num = 0; | 1225 | int num = 0; |
| 1216 | mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); | 1226 | struct mem_cgroup *iter; |
| 1227 | |||
| 1228 | for_each_mem_cgroup_tree(iter, mem) | ||
| 1229 | num++; | ||
| 1217 | return num; | 1230 | return num; |
| 1218 | } | 1231 | } |
| 1219 | 1232 | ||
| @@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1362 | return total; | 1375 | return total; |
| 1363 | } | 1376 | } |
| 1364 | 1377 | ||
| 1365 | static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data) | ||
| 1366 | { | ||
| 1367 | int *val = (int *)data; | ||
| 1368 | int x; | ||
| 1369 | /* | ||
| 1370 | * Logically, we can stop scanning immediately when we find | ||
| 1371 | * a memcg is already locked. But condidering unlock ops and | ||
| 1372 | * creation/removal of memcg, scan-all is simple operation. | ||
| 1373 | */ | ||
| 1374 | x = atomic_inc_return(&mem->oom_lock); | ||
| 1375 | *val = max(x, *val); | ||
| 1376 | return 0; | ||
| 1377 | } | ||
| 1378 | /* | 1378 | /* |
| 1379 | * Check OOM-Killer is already running under our hierarchy. | 1379 | * Check OOM-Killer is already running under our hierarchy. |
| 1380 | * If someone is running, return false. | 1380 | * If someone is running, return false. |
| 1381 | */ | 1381 | */ |
| 1382 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1382 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
| 1383 | { | 1383 | { |
| 1384 | int lock_count = 0; | 1384 | int x, lock_count = 0; |
| 1385 | struct mem_cgroup *iter; | ||
| 1385 | 1386 | ||
| 1386 | mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); | 1387 | for_each_mem_cgroup_tree(iter, mem) { |
| 1388 | x = atomic_inc_return(&iter->oom_lock); | ||
| 1389 | lock_count = max(x, lock_count); | ||
| 1390 | } | ||
| 1387 | 1391 | ||
| 1388 | if (lock_count == 1) | 1392 | if (lock_count == 1) |
| 1389 | return true; | 1393 | return true; |
| 1390 | return false; | 1394 | return false; |
| 1391 | } | 1395 | } |
| 1392 | 1396 | ||
| 1393 | static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) | 1397 | static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) |
| 1394 | { | 1398 | { |
| 1399 | struct mem_cgroup *iter; | ||
| 1400 | |||
| 1395 | /* | 1401 | /* |
| 1396 | * When a new child is created while the hierarchy is under oom, | 1402 | * When a new child is created while the hierarchy is under oom, |
| 1397 | * mem_cgroup_oom_lock() may not be called. We have to use | 1403 | * mem_cgroup_oom_lock() may not be called. We have to use |
| 1398 | * atomic_add_unless() here. | 1404 | * atomic_add_unless() here. |
| 1399 | */ | 1405 | */ |
| 1400 | atomic_add_unless(&mem->oom_lock, -1, 0); | 1406 | for_each_mem_cgroup_tree(iter, mem) |
| 1407 | atomic_add_unless(&iter->oom_lock, -1, 0); | ||
| 1401 | return 0; | 1408 | return 0; |
| 1402 | } | 1409 | } |
| 1403 | 1410 | ||
| 1404 | static void mem_cgroup_oom_unlock(struct mem_cgroup *mem) | ||
| 1405 | { | ||
| 1406 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb); | ||
| 1407 | } | ||
| 1408 | 1411 | ||
| 1409 | static DEFINE_MUTEX(memcg_oom_mutex); | 1412 | static DEFINE_MUTEX(memcg_oom_mutex); |
| 1410 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); | 1413 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); |
| @@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
| 3207 | return retval; | 3210 | return retval; |
| 3208 | } | 3211 | } |
| 3209 | 3212 | ||
| 3210 | struct mem_cgroup_idx_data { | ||
| 3211 | s64 val; | ||
| 3212 | enum mem_cgroup_stat_index idx; | ||
| 3213 | }; | ||
| 3214 | 3213 | ||
| 3215 | static int | 3214 | static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, |
| 3216 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | 3215 | enum mem_cgroup_stat_index idx) |
| 3217 | { | 3216 | { |
| 3218 | struct mem_cgroup_idx_data *d = data; | 3217 | struct mem_cgroup *iter; |
| 3219 | d->val += mem_cgroup_read_stat(mem, d->idx); | 3218 | s64 val = 0; |
| 3220 | return 0; | ||
| 3221 | } | ||
| 3222 | 3219 | ||
| 3223 | static void | 3220 | /* each per cpu's value can be minus.Then, use s64 */ |
| 3224 | mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | 3221 | for_each_mem_cgroup_tree(iter, mem) |
| 3225 | enum mem_cgroup_stat_index idx, s64 *val) | 3222 | val += mem_cgroup_read_stat(iter, idx); |
| 3226 | { | 3223 | |
| 3227 | struct mem_cgroup_idx_data d; | 3224 | if (val < 0) /* race ? */ |
| 3228 | d.idx = idx; | 3225 | val = 0; |
| 3229 | d.val = 0; | 3226 | return val; |
| 3230 | mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | ||
| 3231 | *val = d.val; | ||
| 3232 | } | 3227 | } |
| 3233 | 3228 | ||
| 3234 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | 3229 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) |
| 3235 | { | 3230 | { |
| 3236 | u64 idx_val, val; | 3231 | u64 val; |
| 3237 | 3232 | ||
| 3238 | if (!mem_cgroup_is_root(mem)) { | 3233 | if (!mem_cgroup_is_root(mem)) { |
| 3239 | if (!swap) | 3234 | if (!swap) |
| @@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | |||
| 3242 | return res_counter_read_u64(&mem->memsw, RES_USAGE); | 3237 | return res_counter_read_u64(&mem->memsw, RES_USAGE); |
| 3243 | } | 3238 | } |
| 3244 | 3239 | ||
| 3245 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); | 3240 | val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); |
| 3246 | val = idx_val; | 3241 | val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); |
| 3247 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val); | ||
| 3248 | val += idx_val; | ||
| 3249 | 3242 | ||
| 3250 | if (swap) { | 3243 | if (swap) |
| 3251 | mem_cgroup_get_recursive_idx_stat(mem, | 3244 | val += mem_cgroup_get_recursive_idx_stat(mem, |
| 3252 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | 3245 | MEM_CGROUP_STAT_SWAPOUT); |
| 3253 | val += idx_val; | ||
| 3254 | } | ||
| 3255 | 3246 | ||
| 3256 | return val << PAGE_SHIFT; | 3247 | return val << PAGE_SHIFT; |
| 3257 | } | 3248 | } |
| @@ -3459,9 +3450,9 @@ struct { | |||
| 3459 | }; | 3450 | }; |
| 3460 | 3451 | ||
| 3461 | 3452 | ||
| 3462 | static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | 3453 | static void |
| 3454 | mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | ||
| 3463 | { | 3455 | { |
| 3464 | struct mcs_total_stat *s = data; | ||
| 3465 | s64 val; | 3456 | s64 val; |
| 3466 | 3457 | ||
| 3467 | /* per cpu stat */ | 3458 | /* per cpu stat */ |
| @@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
| 3491 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; | 3482 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; |
| 3492 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); | 3483 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); |
| 3493 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; | 3484 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; |
| 3494 | return 0; | ||
| 3495 | } | 3485 | } |
| 3496 | 3486 | ||
| 3497 | static void | 3487 | static void |
| 3498 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | 3488 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) |
| 3499 | { | 3489 | { |
| 3500 | mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); | 3490 | struct mem_cgroup *iter; |
| 3491 | |||
| 3492 | for_each_mem_cgroup_tree(iter, mem) | ||
| 3493 | mem_cgroup_get_local_stat(iter, s); | ||
| 3501 | } | 3494 | } |
| 3502 | 3495 | ||
| 3503 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | 3496 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
| @@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b) | |||
| 3674 | return _a->threshold - _b->threshold; | 3667 | return _a->threshold - _b->threshold; |
| 3675 | } | 3668 | } |
| 3676 | 3669 | ||
| 3677 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | 3670 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) |
| 3678 | { | 3671 | { |
| 3679 | struct mem_cgroup_eventfd_list *ev; | 3672 | struct mem_cgroup_eventfd_list *ev; |
| 3680 | 3673 | ||
| @@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | |||
| 3685 | 3678 | ||
| 3686 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) | 3679 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) |
| 3687 | { | 3680 | { |
| 3688 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); | 3681 | struct mem_cgroup *iter; |
| 3682 | |||
| 3683 | for_each_mem_cgroup_tree(iter, mem) | ||
| 3684 | mem_cgroup_oom_notify_cb(iter); | ||
| 3689 | } | 3685 | } |
| 3690 | 3686 | ||
| 3691 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | 3687 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, |
