aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/memory.txt56
-rw-r--r--mm/memcontrol.c97
2 files changed, 147 insertions, 6 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index b871f2552b45..e726fb0df719 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -262,10 +262,12 @@ some of the pages cached in the cgroup (page cache pages).
2624.2 Task migration 2624.2 Task migration
263 263
264When a task migrates from one cgroup to another, it's charge is not 264When a task migrates from one cgroup to another, it's charge is not
265carried forward. The pages allocated from the original cgroup still 265carried forward by default. The pages allocated from the original cgroup still
266remain charged to it, the charge is dropped when the page is freed or 266remain charged to it, the charge is dropped when the page is freed or
267reclaimed. 267reclaimed.
268 268
269Note: You can move charges of a task along with task migration. See 8.
270
2694.3 Removing a cgroup 2714.3 Removing a cgroup
270 272
271A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a 273A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
@@ -414,7 +416,57 @@ NOTE1: Soft limits take effect over a long period of time, since they involve
414NOTE2: It is recommended to set the soft limit always below the hard limit, 416NOTE2: It is recommended to set the soft limit always below the hard limit,
415 otherwise the hard limit will take precedence. 417 otherwise the hard limit will take precedence.
416 418
4178. TODO 4198. Move charges at task migration
420
421Users can move charges associated with a task along with task migration, that
422is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
423
4248.1 Interface
425
426This feature is disabled by default. It can be enabled(and disabled again) by
427writing to memory.move_charge_at_immigrate of the destination cgroup.
428
429If you want to enable it:
430
431# echo (some positive value) > memory.move_charge_at_immigrate
432
433Note: Each bits of move_charge_at_immigrate has its own meaning about what type
434 of charges should be moved. See 8.2 for details.
435Note: Charges are moved only when you move mm->owner, IOW, a leader of a thread
436 group.
437Note: If we cannot find enough space for the task in the destination cgroup, we
438 try to make space by reclaiming memory. Task migration may fail if we
439 cannot make enough space.
440Note: It can take several seconds if you move charges in giga bytes order.
441
442And if you want disable it again:
443
444# echo 0 > memory.move_charge_at_immigrate
445
4468.2 Type of charges which can be move
447
448Each bits of move_charge_at_immigrate has its own meaning about what type of
449charges should be moved.
450
451 bit | what type of charges would be moved ?
452 -----+------------------------------------------------------------------------
453 0 | A charge of an anonymous page(or swap of it) used by the target task.
454 | Those pages and swaps must be used only by the target task. You must
455 | enable Swap Extension(see 2.4) to enable move of swap charges.
456
457Note: Those pages and swaps must be charged to the old cgroup.
458Note: More type of pages(e.g. file cache, shmem,) will be supported by other
459 bits in future.
460
4618.3 TODO
462
463- Add support for other types of pages(e.g. file cache, shmem, etc.).
464- Implement madvise(2) to let users decide the vma to be moved or not to be
465 moved.
466- All of moving charge operations are done under cgroup_mutex. It's not good
467 behavior to hold the mutex too long, so we may need some trick.
468
4699. TODO
418 470
4191. Add support for accounting huge pages (as a separate controller) 4711. Add support for accounting huge pages (as a separate controller)
4202. Make per-cgroup scanner reclaim not-shared pages first 4722. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d813823ab08f..59ffaf511d77 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -226,11 +226,26 @@ struct mem_cgroup {
226 bool memsw_is_minimum; 226 bool memsw_is_minimum;
227 227
228 /* 228 /*
229 * Should we move charges of a task when a task is moved into this
230 * mem_cgroup ? And what type of charges should we move ?
231 */
232 unsigned long move_charge_at_immigrate;
233
234 /*
229 * statistics. This must be placed at the end of memcg. 235 * statistics. This must be placed at the end of memcg.
230 */ 236 */
231 struct mem_cgroup_stat stat; 237 struct mem_cgroup_stat stat;
232}; 238};
233 239
240/* Stuffs for move charges at task migration. */
241/*
242 * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a
243 * left-shifted bitmap of these types.
244 */
245enum move_type {
246 NR_MOVE_TYPE,
247};
248
234/* 249/*
235 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft 250 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
236 * limit reclaim to prevent infinite loops, if they ever occur. 251 * limit reclaim to prevent infinite loops, if they ever occur.
@@ -2865,6 +2880,31 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
2865 return 0; 2880 return 0;
2866} 2881}
2867 2882
2883static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
2884 struct cftype *cft)
2885{
2886 return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
2887}
2888
2889static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
2890 struct cftype *cft, u64 val)
2891{
2892 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
2893
2894 if (val >= (1 << NR_MOVE_TYPE))
2895 return -EINVAL;
2896 /*
2897 * We check this value several times in both in can_attach() and
2898 * attach(), so we need cgroup lock to prevent this value from being
2899 * inconsistent.
2900 */
2901 cgroup_lock();
2902 mem->move_charge_at_immigrate = val;
2903 cgroup_unlock();
2904
2905 return 0;
2906}
2907
2868 2908
2869/* For read statistics */ 2909/* For read statistics */
2870enum { 2910enum {
@@ -3098,6 +3138,11 @@ static struct cftype mem_cgroup_files[] = {
3098 .read_u64 = mem_cgroup_swappiness_read, 3138 .read_u64 = mem_cgroup_swappiness_read,
3099 .write_u64 = mem_cgroup_swappiness_write, 3139 .write_u64 = mem_cgroup_swappiness_write,
3100 }, 3140 },
3141 {
3142 .name = "move_charge_at_immigrate",
3143 .read_u64 = mem_cgroup_move_charge_read,
3144 .write_u64 = mem_cgroup_move_charge_write,
3145 },
3101}; 3146};
3102 3147
3103#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 3148#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -3345,6 +3390,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
3345 if (parent) 3390 if (parent)
3346 mem->swappiness = get_swappiness(parent); 3391 mem->swappiness = get_swappiness(parent);
3347 atomic_set(&mem->refcnt, 1); 3392 atomic_set(&mem->refcnt, 1);
3393 mem->move_charge_at_immigrate = 0;
3348 return &mem->css; 3394 return &mem->css;
3349free_out: 3395free_out:
3350 __mem_cgroup_free(mem); 3396 __mem_cgroup_free(mem);
@@ -3381,16 +3427,57 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
3381 return ret; 3427 return ret;
3382} 3428}
3383 3429
3430/* Handlers for move charge at task migration. */
3431static int mem_cgroup_can_move_charge(void)
3432{
3433 return 0;
3434}
3435
3436static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3437 struct cgroup *cgroup,
3438 struct task_struct *p,
3439 bool threadgroup)
3440{
3441 int ret = 0;
3442 struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
3443
3444 if (mem->move_charge_at_immigrate) {
3445 struct mm_struct *mm;
3446 struct mem_cgroup *from = mem_cgroup_from_task(p);
3447
3448 VM_BUG_ON(from == mem);
3449
3450 mm = get_task_mm(p);
3451 if (!mm)
3452 return 0;
3453
3454 /* We move charges only when we move a owner of the mm */
3455 if (mm->owner == p)
3456 ret = mem_cgroup_can_move_charge();
3457
3458 mmput(mm);
3459 }
3460 return ret;
3461}
3462
3463static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
3464 struct cgroup *cgroup,
3465 struct task_struct *p,
3466 bool threadgroup)
3467{
3468}
3469
3470static void mem_cgroup_move_charge(void)
3471{
3472}
3473
3384static void mem_cgroup_move_task(struct cgroup_subsys *ss, 3474static void mem_cgroup_move_task(struct cgroup_subsys *ss,
3385 struct cgroup *cont, 3475 struct cgroup *cont,
3386 struct cgroup *old_cont, 3476 struct cgroup *old_cont,
3387 struct task_struct *p, 3477 struct task_struct *p,
3388 bool threadgroup) 3478 bool threadgroup)
3389{ 3479{
3390 /* 3480 mem_cgroup_move_charge();
3391 * FIXME: It's better to move charges of this process from old
3392 * memcg to new memcg. But it's just on TODO-List now.
3393 */
3394} 3481}
3395 3482
3396struct cgroup_subsys mem_cgroup_subsys = { 3483struct cgroup_subsys mem_cgroup_subsys = {
@@ -3400,6 +3487,8 @@ struct cgroup_subsys mem_cgroup_subsys = {
3400 .pre_destroy = mem_cgroup_pre_destroy, 3487 .pre_destroy = mem_cgroup_pre_destroy,
3401 .destroy = mem_cgroup_destroy, 3488 .destroy = mem_cgroup_destroy,
3402 .populate = mem_cgroup_populate, 3489 .populate = mem_cgroup_populate,
3490 .can_attach = mem_cgroup_can_attach,
3491 .cancel_attach = mem_cgroup_cancel_attach,
3403 .attach = mem_cgroup_move_task, 3492 .attach = mem_cgroup_move_task,
3404 .early_init = 0, 3493 .early_init = 0,
3405 .use_id = 1, 3494 .use_id = 1,