aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h16
-rw-r--r--kernel/cgroup.c93
-rw-r--r--kernel/cpuset.c63
3 files changed, 75 insertions, 97 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0287fccd0f54..8472ed576b64 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -528,15 +528,6 @@ struct cftype_set {
528 struct cftype *cfts; 528 struct cftype *cfts;
529}; 529};
530 530
531struct cgroup_scanner {
532 struct cgroup *cgrp;
533 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
534 void (*process_task)(struct task_struct *p,
535 struct cgroup_scanner *scan);
536 struct ptr_heap *heap;
537 void *data;
538};
539
540/* 531/*
541 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This 532 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
542 * function can be called as long as @cgrp is accessible. 533 * function can be called as long as @cgrp is accessible.
@@ -899,7 +890,12 @@ struct cgroup_task_iter {
899void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); 890void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it);
900struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); 891struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it);
901void cgroup_task_iter_end(struct cgroup_task_iter *it); 892void cgroup_task_iter_end(struct cgroup_task_iter *it);
902int cgroup_scan_tasks(struct cgroup_scanner *scan); 893
894int cgroup_scan_tasks(struct cgroup *cgrp,
895 bool (*test)(struct task_struct *, void *),
896 void (*process)(struct task_struct *, void *),
897 void *data, struct ptr_heap *heap);
898
903int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 899int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
904int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 900int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
905 901
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index abc62ea1303c..7b16ddb2569b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3343,32 +3343,37 @@ static inline int started_after(void *p1, void *p2)
3343 3343
3344/** 3344/**
3345 * cgroup_scan_tasks - iterate though all the tasks in a cgroup 3345 * cgroup_scan_tasks - iterate though all the tasks in a cgroup
3346 * @scan: struct cgroup_scanner containing arguments for the scan 3346 * @cgrp: the cgroup to iterate tasks of
3347 * @test: optional test callback
3348 * @process: process callback
3349 * @data: data passed to @test and @process
3350 * @heap: optional pre-allocated heap used for task iteration
3347 * 3351 *
3348 * Arguments include pointers to callback functions test_task() and 3352 * Iterate through all the tasks in a cgroup, calling @test for each, and
3349 * process_task(). 3353 * if it returns %true, call @process for it also.
3350 * Iterate through all the tasks in a cgroup, calling test_task() for each,
3351 * and if it returns true, call process_task() for it also.
3352 * The test_task pointer may be NULL, meaning always true (select all tasks).
3353 * Effectively duplicates cgroup_task_iter_{start,next,end}()
3354 * but does not lock css_set_lock for the call to process_task().
3355 * The struct cgroup_scanner may be embedded in any structure of the caller's
3356 * creation.
3357 * It is guaranteed that process_task() will act on every task that
3358 * is a member of the cgroup for the duration of this call. This
3359 * function may or may not call process_task() for tasks that exit
3360 * or move to a different cgroup during the call, or are forked or
3361 * move into the cgroup during the call.
3362 * 3354 *
3363 * Note that test_task() may be called with locks held, and may in some 3355 * @test may be NULL, meaning always true (select all tasks), which
3364 * situations be called multiple times for the same task, so it should 3356 * effectively duplicates cgroup_task_iter_{start,next,end}() but does not
3365 * be cheap. 3357 * lock css_set_lock for the call to @process.
3366 * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been 3358 *
3367 * pre-allocated and will be used for heap operations (and its "gt" member will 3359 * It is guaranteed that @process will act on every task that is a member
3368 * be overwritten), else a temporary heap will be used (allocation of which 3360 * of @cgrp for the duration of this call. This function may or may not
3369 * may cause this function to fail). 3361 * call @process for tasks that exit or move to a different cgroup during
3362 * the call, or are forked or move into the cgroup during the call.
3363 *
3364 * Note that @test may be called with locks held, and may in some
3365 * situations be called multiple times for the same task, so it should be
3366 * cheap.
3367 *
3368 * If @heap is non-NULL, a heap has been pre-allocated and will be used for
3369 * heap operations (and its "gt" member will be overwritten), else a
3370 * temporary heap will be used (allocation of which may cause this function
3371 * to fail).
3370 */ 3372 */
3371int cgroup_scan_tasks(struct cgroup_scanner *scan) 3373int cgroup_scan_tasks(struct cgroup *cgrp,
3374 bool (*test)(struct task_struct *, void *),
3375 void (*process)(struct task_struct *, void *),
3376 void *data, struct ptr_heap *heap)
3372{ 3377{
3373 int retval, i; 3378 int retval, i;
3374 struct cgroup_task_iter it; 3379 struct cgroup_task_iter it;
@@ -3376,12 +3381,10 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
3376 /* Never dereference latest_task, since it's not refcounted */ 3381 /* Never dereference latest_task, since it's not refcounted */
3377 struct task_struct *latest_task = NULL; 3382 struct task_struct *latest_task = NULL;
3378 struct ptr_heap tmp_heap; 3383 struct ptr_heap tmp_heap;
3379 struct ptr_heap *heap;
3380 struct timespec latest_time = { 0, 0 }; 3384 struct timespec latest_time = { 0, 0 };
3381 3385
3382 if (scan->heap) { 3386 if (heap) {
3383 /* The caller supplied our heap and pre-allocated its memory */ 3387 /* The caller supplied our heap and pre-allocated its memory */
3384 heap = scan->heap;
3385 heap->gt = &started_after; 3388 heap->gt = &started_after;
3386 } else { 3389 } else {
3387 /* We need to allocate our own heap memory */ 3390 /* We need to allocate our own heap memory */
@@ -3394,25 +3397,24 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
3394 3397
3395 again: 3398 again:
3396 /* 3399 /*
3397 * Scan tasks in the cgroup, using the scanner's "test_task" callback 3400 * Scan tasks in the cgroup, using the @test callback to determine
3398 * to determine which are of interest, and using the scanner's 3401 * which are of interest, and invoking @process callback on the
3399 * "process_task" callback to process any of them that need an update. 3402 * ones which need an update. Since we don't want to hold any
3400 * Since we don't want to hold any locks during the task updates, 3403 * locks during the task updates, gather tasks to be processed in a
3401 * gather tasks to be processed in a heap structure. 3404 * heap structure. The heap is sorted by descending task start
3402 * The heap is sorted by descending task start time. 3405 * time. If the statically-sized heap fills up, we overflow tasks
3403 * If the statically-sized heap fills up, we overflow tasks that 3406 * that started later, and in future iterations only consider tasks
3404 * started later, and in future iterations only consider tasks that 3407 * that started after the latest task in the previous pass. This
3405 * started after the latest task in the previous pass. This
3406 * guarantees forward progress and that we don't miss any tasks. 3408 * guarantees forward progress and that we don't miss any tasks.
3407 */ 3409 */
3408 heap->size = 0; 3410 heap->size = 0;
3409 cgroup_task_iter_start(scan->cgrp, &it); 3411 cgroup_task_iter_start(cgrp, &it);
3410 while ((p = cgroup_task_iter_next(&it))) { 3412 while ((p = cgroup_task_iter_next(&it))) {
3411 /* 3413 /*
3412 * Only affect tasks that qualify per the caller's callback, 3414 * Only affect tasks that qualify per the caller's callback,
3413 * if he provided one 3415 * if he provided one
3414 */ 3416 */
3415 if (scan->test_task && !scan->test_task(p, scan)) 3417 if (test && !test(p, data))
3416 continue; 3418 continue;
3417 /* 3419 /*
3418 * Only process tasks that started after the last task 3420 * Only process tasks that started after the last task
@@ -3450,7 +3452,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
3450 latest_task = q; 3452 latest_task = q;
3451 } 3453 }
3452 /* Process the task per the caller's callback */ 3454 /* Process the task per the caller's callback */
3453 scan->process_task(q, scan); 3455 process(q, data);
3454 put_task_struct(q); 3456 put_task_struct(q);
3455 } 3457 }
3456 /* 3458 /*
@@ -3467,10 +3469,9 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
3467 return 0; 3469 return 0;
3468} 3470}
3469 3471
3470static void cgroup_transfer_one_task(struct task_struct *task, 3472static void cgroup_transfer_one_task(struct task_struct *task, void *data)
3471 struct cgroup_scanner *scan)
3472{ 3473{
3473 struct cgroup *new_cgroup = scan->data; 3474 struct cgroup *new_cgroup = data;
3474 3475
3475 mutex_lock(&cgroup_mutex); 3476 mutex_lock(&cgroup_mutex);
3476 cgroup_attach_task(new_cgroup, task, false); 3477 cgroup_attach_task(new_cgroup, task, false);
@@ -3484,15 +3485,7 @@ static void cgroup_transfer_one_task(struct task_struct *task,
3484 */ 3485 */
3485int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) 3486int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
3486{ 3487{
3487 struct cgroup_scanner scan; 3488 return cgroup_scan_tasks(from, NULL, cgroup_transfer_one_task, to, NULL);
3488
3489 scan.cgrp = from;
3490 scan.test_task = NULL; /* select all tasks in cgroup */
3491 scan.process_task = cgroup_transfer_one_task;
3492 scan.heap = NULL;
3493 scan.data = to;
3494
3495 return cgroup_scan_tasks(&scan);
3496} 3489}
3497 3490
3498/* 3491/*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index be4f5036ea5e..6fe23f2ac742 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -830,7 +830,7 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs)
830/** 830/**
831 * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's 831 * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's
832 * @tsk: task to test 832 * @tsk: task to test
833 * @scan: struct cgroup_scanner containing the cgroup of the task 833 * @data: cpuset to @tsk belongs to
834 * 834 *
835 * Called by cgroup_scan_tasks() for each task in a cgroup whose 835 * Called by cgroup_scan_tasks() for each task in a cgroup whose
836 * cpus_allowed mask needs to be changed. 836 * cpus_allowed mask needs to be changed.
@@ -838,12 +838,11 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs)
838 * We don't need to re-check for the cgroup/cpuset membership, since we're 838 * We don't need to re-check for the cgroup/cpuset membership, since we're
839 * holding cpuset_mutex at this point. 839 * holding cpuset_mutex at this point.
840 */ 840 */
841static void cpuset_change_cpumask(struct task_struct *tsk, 841static void cpuset_change_cpumask(struct task_struct *tsk, void *data)
842 struct cgroup_scanner *scan)
843{ 842{
844 struct cpuset *cpus_cs; 843 struct cpuset *cs = data;
844 struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
845 845
846 cpus_cs = effective_cpumask_cpuset(cgroup_cs(scan->cgrp));
847 set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); 846 set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed);
848} 847}
849 848
@@ -862,13 +861,8 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
862 */ 861 */
863static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) 862static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
864{ 863{
865 struct cgroup_scanner scan; 864 cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_cpumask, cs,
866 865 heap);
867 scan.cgrp = cs->css.cgroup;
868 scan.test_task = NULL;
869 scan.process_task = cpuset_change_cpumask;
870 scan.heap = heap;
871 cgroup_scan_tasks(&scan);
872} 866}
873 867
874/* 868/*
@@ -1052,20 +1046,24 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
1052 task_unlock(tsk); 1046 task_unlock(tsk);
1053} 1047}
1054 1048
1049struct cpuset_change_nodemask_arg {
1050 struct cpuset *cs;
1051 nodemask_t *newmems;
1052};
1053
1055/* 1054/*
1056 * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy 1055 * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
1057 * of it to cpuset's new mems_allowed, and migrate pages to new nodes if 1056 * of it to cpuset's new mems_allowed, and migrate pages to new nodes if
1058 * memory_migrate flag is set. Called with cpuset_mutex held. 1057 * memory_migrate flag is set. Called with cpuset_mutex held.
1059 */ 1058 */
1060static void cpuset_change_nodemask(struct task_struct *p, 1059static void cpuset_change_nodemask(struct task_struct *p, void *data)
1061 struct cgroup_scanner *scan)
1062{ 1060{
1063 struct cpuset *cs = cgroup_cs(scan->cgrp); 1061 struct cpuset_change_nodemask_arg *arg = data;
1062 struct cpuset *cs = arg->cs;
1064 struct mm_struct *mm; 1063 struct mm_struct *mm;
1065 int migrate; 1064 int migrate;
1066 nodemask_t *newmems = scan->data;
1067 1065
1068 cpuset_change_task_nodemask(p, newmems); 1066 cpuset_change_task_nodemask(p, arg->newmems);
1069 1067
1070 mm = get_task_mm(p); 1068 mm = get_task_mm(p);
1071 if (!mm) 1069 if (!mm)
@@ -1075,7 +1073,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
1075 1073
1076 mpol_rebind_mm(mm, &cs->mems_allowed); 1074 mpol_rebind_mm(mm, &cs->mems_allowed);
1077 if (migrate) 1075 if (migrate)
1078 cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems); 1076 cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems);
1079 mmput(mm); 1077 mmput(mm);
1080} 1078}
1081 1079
@@ -1093,19 +1091,14 @@ static void *cpuset_being_rebound;
1093static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) 1091static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
1094{ 1092{
1095 static nodemask_t newmems; /* protected by cpuset_mutex */ 1093 static nodemask_t newmems; /* protected by cpuset_mutex */
1096 struct cgroup_scanner scan;
1097 struct cpuset *mems_cs = effective_nodemask_cpuset(cs); 1094 struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
1095 struct cpuset_change_nodemask_arg arg = { .cs = cs,
1096 .newmems = &newmems };
1098 1097
1099 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1098 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1100 1099
1101 guarantee_online_mems(mems_cs, &newmems); 1100 guarantee_online_mems(mems_cs, &newmems);
1102 1101
1103 scan.cgrp = cs->css.cgroup;
1104 scan.test_task = NULL;
1105 scan.process_task = cpuset_change_nodemask;
1106 scan.heap = heap;
1107 scan.data = &newmems;
1108
1109 /* 1102 /*
1110 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't 1103 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
1111 * take while holding tasklist_lock. Forks can happen - the 1104 * take while holding tasklist_lock. Forks can happen - the
@@ -1116,7 +1109,8 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
1116 * It's ok if we rebind the same mm twice; mpol_rebind_mm() 1109 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
1117 * is idempotent. Also migrate pages in each mm to new nodes. 1110 * is idempotent. Also migrate pages in each mm to new nodes.
1118 */ 1111 */
1119 cgroup_scan_tasks(&scan); 1112 cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_nodemask, &arg,
1113 heap);
1120 1114
1121 /* 1115 /*
1122 * All the tasks' nodemasks have been updated, update 1116 * All the tasks' nodemasks have been updated, update
@@ -1263,17 +1257,18 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1263/* 1257/*
1264 * cpuset_change_flag - make a task's spread flags the same as its cpuset's 1258 * cpuset_change_flag - make a task's spread flags the same as its cpuset's
1265 * @tsk: task to be updated 1259 * @tsk: task to be updated
1266 * @scan: struct cgroup_scanner containing the cgroup of the task 1260 * @data: cpuset to @tsk belongs to
1267 * 1261 *
1268 * Called by cgroup_scan_tasks() for each task in a cgroup. 1262 * Called by cgroup_scan_tasks() for each task in a cgroup.
1269 * 1263 *
1270 * We don't need to re-check for the cgroup/cpuset membership, since we're 1264 * We don't need to re-check for the cgroup/cpuset membership, since we're
1271 * holding cpuset_mutex at this point. 1265 * holding cpuset_mutex at this point.
1272 */ 1266 */
1273static void cpuset_change_flag(struct task_struct *tsk, 1267static void cpuset_change_flag(struct task_struct *tsk, void *data)
1274 struct cgroup_scanner *scan)
1275{ 1268{
1276 cpuset_update_task_spread_flag(cgroup_cs(scan->cgrp), tsk); 1269 struct cpuset *cs = data;
1270
1271 cpuset_update_task_spread_flag(cs, tsk);
1277} 1272}
1278 1273
1279/* 1274/*
@@ -1291,13 +1286,7 @@ static void cpuset_change_flag(struct task_struct *tsk,
1291 */ 1286 */
1292static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) 1287static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
1293{ 1288{
1294 struct cgroup_scanner scan; 1289 cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_flag, cs, heap);
1295
1296 scan.cgrp = cs->css.cgroup;
1297 scan.test_task = NULL;
1298 scan.process_task = cpuset_change_flag;
1299 scan.heap = heap;
1300 cgroup_scan_tasks(&scan);
1301} 1290}
1302 1291
1303/* 1292/*