diff options
-rw-r--r-- | include/linux/cgroup.h | 16 | ||||
-rw-r--r-- | kernel/cgroup.c | 93 | ||||
-rw-r--r-- | kernel/cpuset.c | 63 |
3 files changed, 75 insertions, 97 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0287fccd0f54..8472ed576b64 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -528,15 +528,6 @@ struct cftype_set { | |||
528 | struct cftype *cfts; | 528 | struct cftype *cfts; |
529 | }; | 529 | }; |
530 | 530 | ||
531 | struct cgroup_scanner { | ||
532 | struct cgroup *cgrp; | ||
533 | int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); | ||
534 | void (*process_task)(struct task_struct *p, | ||
535 | struct cgroup_scanner *scan); | ||
536 | struct ptr_heap *heap; | ||
537 | void *data; | ||
538 | }; | ||
539 | |||
540 | /* | 531 | /* |
541 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | 532 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This |
542 | * function can be called as long as @cgrp is accessible. | 533 | * function can be called as long as @cgrp is accessible. |
@@ -899,7 +890,12 @@ struct cgroup_task_iter { | |||
899 | void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); | 890 | void cgroup_task_iter_start(struct cgroup *cgrp, struct cgroup_task_iter *it); |
900 | struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); | 891 | struct task_struct *cgroup_task_iter_next(struct cgroup_task_iter *it); |
901 | void cgroup_task_iter_end(struct cgroup_task_iter *it); | 892 | void cgroup_task_iter_end(struct cgroup_task_iter *it); |
902 | int cgroup_scan_tasks(struct cgroup_scanner *scan); | 893 | |
894 | int cgroup_scan_tasks(struct cgroup *cgrp, | ||
895 | bool (*test)(struct task_struct *, void *), | ||
896 | void (*process)(struct task_struct *, void *), | ||
897 | void *data, struct ptr_heap *heap); | ||
898 | |||
903 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); | 899 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); |
904 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); | 900 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); |
905 | 901 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index abc62ea1303c..7b16ddb2569b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3343,32 +3343,37 @@ static inline int started_after(void *p1, void *p2) | |||
3343 | 3343 | ||
3344 | /** | 3344 | /** |
3345 | * cgroup_scan_tasks - iterate though all the tasks in a cgroup | 3345 | * cgroup_scan_tasks - iterate though all the tasks in a cgroup |
3346 | * @scan: struct cgroup_scanner containing arguments for the scan | 3346 | * @cgrp: the cgroup to iterate tasks of |
3347 | * @test: optional test callback | ||
3348 | * @process: process callback | ||
3349 | * @data: data passed to @test and @process | ||
3350 | * @heap: optional pre-allocated heap used for task iteration | ||
3347 | * | 3351 | * |
3348 | * Arguments include pointers to callback functions test_task() and | 3352 | * Iterate through all the tasks in a cgroup, calling @test for each, and |
3349 | * process_task(). | 3353 | * if it returns %true, call @process for it also. |
3350 | * Iterate through all the tasks in a cgroup, calling test_task() for each, | ||
3351 | * and if it returns true, call process_task() for it also. | ||
3352 | * The test_task pointer may be NULL, meaning always true (select all tasks). | ||
3353 | * Effectively duplicates cgroup_task_iter_{start,next,end}() | ||
3354 | * but does not lock css_set_lock for the call to process_task(). | ||
3355 | * The struct cgroup_scanner may be embedded in any structure of the caller's | ||
3356 | * creation. | ||
3357 | * It is guaranteed that process_task() will act on every task that | ||
3358 | * is a member of the cgroup for the duration of this call. This | ||
3359 | * function may or may not call process_task() for tasks that exit | ||
3360 | * or move to a different cgroup during the call, or are forked or | ||
3361 | * move into the cgroup during the call. | ||
3362 | * | 3354 | * |
3363 | * Note that test_task() may be called with locks held, and may in some | 3355 | * @test may be NULL, meaning always true (select all tasks), which |
3364 | * situations be called multiple times for the same task, so it should | 3356 | * effectively duplicates cgroup_task_iter_{start,next,end}() but does not |
3365 | * be cheap. | 3357 | * lock css_set_lock for the call to @process. |
3366 | * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been | 3358 | * |
3367 | * pre-allocated and will be used for heap operations (and its "gt" member will | 3359 | * It is guaranteed that @process will act on every task that is a member |
3368 | * be overwritten), else a temporary heap will be used (allocation of which | 3360 | * of @cgrp for the duration of this call. This function may or may not |
3369 | * may cause this function to fail). | 3361 | * call @process for tasks that exit or move to a different cgroup during |
3362 | * the call, or are forked or move into the cgroup during the call. | ||
3363 | * | ||
3364 | * Note that @test may be called with locks held, and may in some | ||
3365 | * situations be called multiple times for the same task, so it should be | ||
3366 | * cheap. | ||
3367 | * | ||
3368 | * If @heap is non-NULL, a heap has been pre-allocated and will be used for | ||
3369 | * heap operations (and its "gt" member will be overwritten), else a | ||
3370 | * temporary heap will be used (allocation of which may cause this function | ||
3371 | * to fail). | ||
3370 | */ | 3372 | */ |
3371 | int cgroup_scan_tasks(struct cgroup_scanner *scan) | 3373 | int cgroup_scan_tasks(struct cgroup *cgrp, |
3374 | bool (*test)(struct task_struct *, void *), | ||
3375 | void (*process)(struct task_struct *, void *), | ||
3376 | void *data, struct ptr_heap *heap) | ||
3372 | { | 3377 | { |
3373 | int retval, i; | 3378 | int retval, i; |
3374 | struct cgroup_task_iter it; | 3379 | struct cgroup_task_iter it; |
@@ -3376,12 +3381,10 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3376 | /* Never dereference latest_task, since it's not refcounted */ | 3381 | /* Never dereference latest_task, since it's not refcounted */ |
3377 | struct task_struct *latest_task = NULL; | 3382 | struct task_struct *latest_task = NULL; |
3378 | struct ptr_heap tmp_heap; | 3383 | struct ptr_heap tmp_heap; |
3379 | struct ptr_heap *heap; | ||
3380 | struct timespec latest_time = { 0, 0 }; | 3384 | struct timespec latest_time = { 0, 0 }; |
3381 | 3385 | ||
3382 | if (scan->heap) { | 3386 | if (heap) { |
3383 | /* The caller supplied our heap and pre-allocated its memory */ | 3387 | /* The caller supplied our heap and pre-allocated its memory */ |
3384 | heap = scan->heap; | ||
3385 | heap->gt = &started_after; | 3388 | heap->gt = &started_after; |
3386 | } else { | 3389 | } else { |
3387 | /* We need to allocate our own heap memory */ | 3390 | /* We need to allocate our own heap memory */ |
@@ -3394,25 +3397,24 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3394 | 3397 | ||
3395 | again: | 3398 | again: |
3396 | /* | 3399 | /* |
3397 | * Scan tasks in the cgroup, using the scanner's "test_task" callback | 3400 | * Scan tasks in the cgroup, using the @test callback to determine |
3398 | * to determine which are of interest, and using the scanner's | 3401 | * which are of interest, and invoking @process callback on the |
3399 | * "process_task" callback to process any of them that need an update. | 3402 | * ones which need an update. Since we don't want to hold any |
3400 | * Since we don't want to hold any locks during the task updates, | 3403 | * locks during the task updates, gather tasks to be processed in a |
3401 | * gather tasks to be processed in a heap structure. | 3404 | * heap structure. The heap is sorted by descending task start |
3402 | * The heap is sorted by descending task start time. | 3405 | * time. If the statically-sized heap fills up, we overflow tasks |
3403 | * If the statically-sized heap fills up, we overflow tasks that | 3406 | * that started later, and in future iterations only consider tasks |
3404 | * started later, and in future iterations only consider tasks that | 3407 | * that started after the latest task in the previous pass. This |
3405 | * started after the latest task in the previous pass. This | ||
3406 | * guarantees forward progress and that we don't miss any tasks. | 3408 | * guarantees forward progress and that we don't miss any tasks. |
3407 | */ | 3409 | */ |
3408 | heap->size = 0; | 3410 | heap->size = 0; |
3409 | cgroup_task_iter_start(scan->cgrp, &it); | 3411 | cgroup_task_iter_start(cgrp, &it); |
3410 | while ((p = cgroup_task_iter_next(&it))) { | 3412 | while ((p = cgroup_task_iter_next(&it))) { |
3411 | /* | 3413 | /* |
3412 | * Only affect tasks that qualify per the caller's callback, | 3414 | * Only affect tasks that qualify per the caller's callback, |
3413 | * if he provided one | 3415 | * if he provided one |
3414 | */ | 3416 | */ |
3415 | if (scan->test_task && !scan->test_task(p, scan)) | 3417 | if (test && !test(p, data)) |
3416 | continue; | 3418 | continue; |
3417 | /* | 3419 | /* |
3418 | * Only process tasks that started after the last task | 3420 | * Only process tasks that started after the last task |
@@ -3450,7 +3452,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3450 | latest_task = q; | 3452 | latest_task = q; |
3451 | } | 3453 | } |
3452 | /* Process the task per the caller's callback */ | 3454 | /* Process the task per the caller's callback */ |
3453 | scan->process_task(q, scan); | 3455 | process(q, data); |
3454 | put_task_struct(q); | 3456 | put_task_struct(q); |
3455 | } | 3457 | } |
3456 | /* | 3458 | /* |
@@ -3467,10 +3469,9 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3467 | return 0; | 3469 | return 0; |
3468 | } | 3470 | } |
3469 | 3471 | ||
3470 | static void cgroup_transfer_one_task(struct task_struct *task, | 3472 | static void cgroup_transfer_one_task(struct task_struct *task, void *data) |
3471 | struct cgroup_scanner *scan) | ||
3472 | { | 3473 | { |
3473 | struct cgroup *new_cgroup = scan->data; | 3474 | struct cgroup *new_cgroup = data; |
3474 | 3475 | ||
3475 | mutex_lock(&cgroup_mutex); | 3476 | mutex_lock(&cgroup_mutex); |
3476 | cgroup_attach_task(new_cgroup, task, false); | 3477 | cgroup_attach_task(new_cgroup, task, false); |
@@ -3484,15 +3485,7 @@ static void cgroup_transfer_one_task(struct task_struct *task, | |||
3484 | */ | 3485 | */ |
3485 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | 3486 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) |
3486 | { | 3487 | { |
3487 | struct cgroup_scanner scan; | 3488 | return cgroup_scan_tasks(from, NULL, cgroup_transfer_one_task, to, NULL); |
3488 | |||
3489 | scan.cgrp = from; | ||
3490 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
3491 | scan.process_task = cgroup_transfer_one_task; | ||
3492 | scan.heap = NULL; | ||
3493 | scan.data = to; | ||
3494 | |||
3495 | return cgroup_scan_tasks(&scan); | ||
3496 | } | 3489 | } |
3497 | 3490 | ||
3498 | /* | 3491 | /* |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index be4f5036ea5e..6fe23f2ac742 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -830,7 +830,7 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) | |||
830 | /** | 830 | /** |
831 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's | 831 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's |
832 | * @tsk: task to test | 832 | * @tsk: task to test |
833 | * @scan: struct cgroup_scanner containing the cgroup of the task | 833 | * @data: cpuset to @tsk belongs to |
834 | * | 834 | * |
835 | * Called by cgroup_scan_tasks() for each task in a cgroup whose | 835 | * Called by cgroup_scan_tasks() for each task in a cgroup whose |
836 | * cpus_allowed mask needs to be changed. | 836 | * cpus_allowed mask needs to be changed. |
@@ -838,12 +838,11 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) | |||
838 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 838 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
839 | * holding cpuset_mutex at this point. | 839 | * holding cpuset_mutex at this point. |
840 | */ | 840 | */ |
841 | static void cpuset_change_cpumask(struct task_struct *tsk, | 841 | static void cpuset_change_cpumask(struct task_struct *tsk, void *data) |
842 | struct cgroup_scanner *scan) | ||
843 | { | 842 | { |
844 | struct cpuset *cpus_cs; | 843 | struct cpuset *cs = data; |
844 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); | ||
845 | 845 | ||
846 | cpus_cs = effective_cpumask_cpuset(cgroup_cs(scan->cgrp)); | ||
847 | set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); | 846 | set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); |
848 | } | 847 | } |
849 | 848 | ||
@@ -862,13 +861,8 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
862 | */ | 861 | */ |
863 | static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) | 862 | static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) |
864 | { | 863 | { |
865 | struct cgroup_scanner scan; | 864 | cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_cpumask, cs, |
866 | 865 | heap); | |
867 | scan.cgrp = cs->css.cgroup; | ||
868 | scan.test_task = NULL; | ||
869 | scan.process_task = cpuset_change_cpumask; | ||
870 | scan.heap = heap; | ||
871 | cgroup_scan_tasks(&scan); | ||
872 | } | 866 | } |
873 | 867 | ||
874 | /* | 868 | /* |
@@ -1052,20 +1046,24 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1052 | task_unlock(tsk); | 1046 | task_unlock(tsk); |
1053 | } | 1047 | } |
1054 | 1048 | ||
1049 | struct cpuset_change_nodemask_arg { | ||
1050 | struct cpuset *cs; | ||
1051 | nodemask_t *newmems; | ||
1052 | }; | ||
1053 | |||
1055 | /* | 1054 | /* |
1056 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy | 1055 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy |
1057 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if | 1056 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if |
1058 | * memory_migrate flag is set. Called with cpuset_mutex held. | 1057 | * memory_migrate flag is set. Called with cpuset_mutex held. |
1059 | */ | 1058 | */ |
1060 | static void cpuset_change_nodemask(struct task_struct *p, | 1059 | static void cpuset_change_nodemask(struct task_struct *p, void *data) |
1061 | struct cgroup_scanner *scan) | ||
1062 | { | 1060 | { |
1063 | struct cpuset *cs = cgroup_cs(scan->cgrp); | 1061 | struct cpuset_change_nodemask_arg *arg = data; |
1062 | struct cpuset *cs = arg->cs; | ||
1064 | struct mm_struct *mm; | 1063 | struct mm_struct *mm; |
1065 | int migrate; | 1064 | int migrate; |
1066 | nodemask_t *newmems = scan->data; | ||
1067 | 1065 | ||
1068 | cpuset_change_task_nodemask(p, newmems); | 1066 | cpuset_change_task_nodemask(p, arg->newmems); |
1069 | 1067 | ||
1070 | mm = get_task_mm(p); | 1068 | mm = get_task_mm(p); |
1071 | if (!mm) | 1069 | if (!mm) |
@@ -1075,7 +1073,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1075 | 1073 | ||
1076 | mpol_rebind_mm(mm, &cs->mems_allowed); | 1074 | mpol_rebind_mm(mm, &cs->mems_allowed); |
1077 | if (migrate) | 1075 | if (migrate) |
1078 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems); | 1076 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems); |
1079 | mmput(mm); | 1077 | mmput(mm); |
1080 | } | 1078 | } |
1081 | 1079 | ||
@@ -1093,19 +1091,14 @@ static void *cpuset_being_rebound; | |||
1093 | static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) | 1091 | static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) |
1094 | { | 1092 | { |
1095 | static nodemask_t newmems; /* protected by cpuset_mutex */ | 1093 | static nodemask_t newmems; /* protected by cpuset_mutex */ |
1096 | struct cgroup_scanner scan; | ||
1097 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); | 1094 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); |
1095 | struct cpuset_change_nodemask_arg arg = { .cs = cs, | ||
1096 | .newmems = &newmems }; | ||
1098 | 1097 | ||
1099 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1098 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1100 | 1099 | ||
1101 | guarantee_online_mems(mems_cs, &newmems); | 1100 | guarantee_online_mems(mems_cs, &newmems); |
1102 | 1101 | ||
1103 | scan.cgrp = cs->css.cgroup; | ||
1104 | scan.test_task = NULL; | ||
1105 | scan.process_task = cpuset_change_nodemask; | ||
1106 | scan.heap = heap; | ||
1107 | scan.data = &newmems; | ||
1108 | |||
1109 | /* | 1102 | /* |
1110 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't | 1103 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't |
1111 | * take while holding tasklist_lock. Forks can happen - the | 1104 | * take while holding tasklist_lock. Forks can happen - the |
@@ -1116,7 +1109,8 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) | |||
1116 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() | 1109 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
1117 | * is idempotent. Also migrate pages in each mm to new nodes. | 1110 | * is idempotent. Also migrate pages in each mm to new nodes. |
1118 | */ | 1111 | */ |
1119 | cgroup_scan_tasks(&scan); | 1112 | cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_nodemask, &arg, |
1113 | heap); | ||
1120 | 1114 | ||
1121 | /* | 1115 | /* |
1122 | * All the tasks' nodemasks have been updated, update | 1116 | * All the tasks' nodemasks have been updated, update |
@@ -1263,17 +1257,18 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1263 | /* | 1257 | /* |
1264 | * cpuset_change_flag - make a task's spread flags the same as its cpuset's | 1258 | * cpuset_change_flag - make a task's spread flags the same as its cpuset's |
1265 | * @tsk: task to be updated | 1259 | * @tsk: task to be updated |
1266 | * @scan: struct cgroup_scanner containing the cgroup of the task | 1260 | * @data: cpuset to @tsk belongs to |
1267 | * | 1261 | * |
1268 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1262 | * Called by cgroup_scan_tasks() for each task in a cgroup. |
1269 | * | 1263 | * |
1270 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 1264 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
1271 | * holding cpuset_mutex at this point. | 1265 | * holding cpuset_mutex at this point. |
1272 | */ | 1266 | */ |
1273 | static void cpuset_change_flag(struct task_struct *tsk, | 1267 | static void cpuset_change_flag(struct task_struct *tsk, void *data) |
1274 | struct cgroup_scanner *scan) | ||
1275 | { | 1268 | { |
1276 | cpuset_update_task_spread_flag(cgroup_cs(scan->cgrp), tsk); | 1269 | struct cpuset *cs = data; |
1270 | |||
1271 | cpuset_update_task_spread_flag(cs, tsk); | ||
1277 | } | 1272 | } |
1278 | 1273 | ||
1279 | /* | 1274 | /* |
@@ -1291,13 +1286,7 @@ static void cpuset_change_flag(struct task_struct *tsk, | |||
1291 | */ | 1286 | */ |
1292 | static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) | 1287 | static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) |
1293 | { | 1288 | { |
1294 | struct cgroup_scanner scan; | 1289 | cgroup_scan_tasks(cs->css.cgroup, NULL, cpuset_change_flag, cs, heap); |
1295 | |||
1296 | scan.cgrp = cs->css.cgroup; | ||
1297 | scan.test_task = NULL; | ||
1298 | scan.process_task = cpuset_change_flag; | ||
1299 | scan.heap = heap; | ||
1300 | cgroup_scan_tasks(&scan); | ||
1301 | } | 1290 | } |
1302 | 1291 | ||
1303 | /* | 1292 | /* |