diff options
author | Tejun Heo <tj@kernel.org> | 2013-03-19 16:45:20 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-03-19 16:45:20 -0400 |
commit | 14a40ffccd6163bbcd1d6f32b28a88ffe6149fc6 (patch) | |
tree | eb61e5bf7b64c3e67f3e33fe6b07fde4ee1d4d43 | |
parent | 2e109a2855bf6cf675a8b74dbd89b6492e8def42 (diff) |
sched: replace PF_THREAD_BOUND with PF_NO_SETAFFINITY
PF_THREAD_BOUND was originally used to mark kernel threads which were
bound to a specific CPU using kthread_bind() and a task with the flag
set allows cpus_allowed modifications only to itself. Workqueue is
currently abusing it to prevent userland from meddling with
cpus_allowed of workqueue workers.
What we need is a flag to prevent userland from messing with
cpus_allowed of certain kernel tasks. In kernel, anyone can
(incorrectly) squash the flag, and, for worker-type usages,
restricting cpus_allowed modification to the task itself doesn't
provide meaningful extra proection as other tasks can inject work
items to the task anyway.
This patch replaces PF_THREAD_BOUND with PF_NO_SETAFFINITY.
sched_setaffinity() checks the flag and return -EINVAL if set.
set_cpus_allowed_ptr() is no longer affected by the flag.
This will allow simplifying workqueue worker CPU affinity management.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/cgroup.c | 4 | ||||
-rw-r--r-- | kernel/cpuset.c | 16 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 9 | ||||
-rw-r--r-- | kernel/workqueue.c | 10 |
6 files changed, 19 insertions, 24 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..e5c64f7b8c1d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1793,7 +1793,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, | |||
1793 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | 1793 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1794 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1794 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1795 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1795 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1796 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | 1796 | #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ |
1797 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | 1797 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1798 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1798 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1799 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1799 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a32f9432666c..3852d926322c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2224,11 +2224,11 @@ retry_find_task: | |||
2224 | tsk = tsk->group_leader; | 2224 | tsk = tsk->group_leader; |
2225 | 2225 | ||
2226 | /* | 2226 | /* |
2227 | * Workqueue threads may acquire PF_THREAD_BOUND and become | 2227 | * Workqueue threads may acquire PF_NO_SETAFFINITY and become |
2228 | * trapped in a cpuset, or RT worker may be born in a cgroup | 2228 | * trapped in a cpuset, or RT worker may be born in a cgroup |
2229 | * with no rt_runtime allocated. Just say no. | 2229 | * with no rt_runtime allocated. Just say no. |
2230 | */ | 2230 | */ |
2231 | if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { | 2231 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { |
2232 | ret = -EINVAL; | 2232 | ret = -EINVAL; |
2233 | rcu_read_unlock(); | 2233 | rcu_read_unlock(); |
2234 | goto out_unlock_cgroup; | 2234 | goto out_unlock_cgroup; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4f9dfe43ecbd..f22e94792707 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1388,16 +1388,16 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1388 | 1388 | ||
1389 | cgroup_taskset_for_each(task, cgrp, tset) { | 1389 | cgroup_taskset_for_each(task, cgrp, tset) { |
1390 | /* | 1390 | /* |
1391 | * Kthreads bound to specific cpus cannot be moved to a new | 1391 | * Kthreads which disallow setaffinity shouldn't be moved |
1392 | * cpuset; we cannot change their cpu affinity and | 1392 | * to a new cpuset; we don't want to change their cpu |
1393 | * isolating such threads by their set of allowed nodes is | 1393 | * affinity and isolating such threads by their set of |
1394 | * unnecessary. Thus, cpusets are not applicable for such | 1394 | * allowed nodes is unnecessary. Thus, cpusets are not |
1395 | * threads. This prevents checking for success of | 1395 | * applicable for such threads. This prevents checking for |
1396 | * set_cpus_allowed_ptr() on all attached tasks before | 1396 | * success of set_cpus_allowed_ptr() on all attached tasks |
1397 | * cpus_allowed may be changed. | 1397 | * before cpus_allowed may be changed. |
1398 | */ | 1398 | */ |
1399 | ret = -EINVAL; | 1399 | ret = -EINVAL; |
1400 | if (task->flags & PF_THREAD_BOUND) | 1400 | if (task->flags & PF_NO_SETAFFINITY) |
1401 | goto out_unlock; | 1401 | goto out_unlock; |
1402 | ret = security_task_setscheduler(task); | 1402 | ret = security_task_setscheduler(task); |
1403 | if (ret) | 1403 | if (ret) |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9baf..a2fbbb782bad 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -260,7 +260,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu) | |||
260 | { | 260 | { |
261 | /* It's safe because the task is inactive. */ | 261 | /* It's safe because the task is inactive. */ |
262 | do_set_cpus_allowed(p, cpumask_of(cpu)); | 262 | do_set_cpus_allowed(p, cpumask_of(cpu)); |
263 | p->flags |= PF_THREAD_BOUND; | 263 | p->flags |= PF_NO_SETAFFINITY; |
264 | } | 264 | } |
265 | 265 | ||
266 | /** | 266 | /** |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..23606ee961b5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4126,6 +4126,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4126 | get_task_struct(p); | 4126 | get_task_struct(p); |
4127 | rcu_read_unlock(); | 4127 | rcu_read_unlock(); |
4128 | 4128 | ||
4129 | if (p->flags & PF_NO_SETAFFINITY) { | ||
4130 | retval = -EINVAL; | ||
4131 | goto out_put_task; | ||
4132 | } | ||
4129 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 4133 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
4130 | retval = -ENOMEM; | 4134 | retval = -ENOMEM; |
4131 | goto out_put_task; | 4135 | goto out_put_task; |
@@ -4773,11 +4777,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
4773 | goto out; | 4777 | goto out; |
4774 | } | 4778 | } |
4775 | 4779 | ||
4776 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { | ||
4777 | ret = -EINVAL; | ||
4778 | goto out; | ||
4779 | } | ||
4780 | |||
4781 | do_set_cpus_allowed(p, new_mask); | 4780 | do_set_cpus_allowed(p, new_mask); |
4782 | 4781 | ||
4783 | /* Can the task run on the task's current CPU? If so, we're done */ | 4782 | /* Can the task run on the task's current CPU? If so, we're done */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 969be0b72071..39a591f65b08 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -1757,12 +1757,8 @@ static struct worker *create_worker(struct worker_pool *pool) | |||
1757 | set_user_nice(worker->task, pool->attrs->nice); | 1757 | set_user_nice(worker->task, pool->attrs->nice); |
1758 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); | 1758 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); |
1759 | 1759 | ||
1760 | /* | 1760 | /* prevent userland from meddling with cpumask of workqueue workers */ |
1761 | * %PF_THREAD_BOUND is used to prevent userland from meddling with | 1761 | worker->task->flags |= PF_NO_SETAFFINITY; |
1762 | * cpumask of workqueue workers. This is an abuse. We need | ||
1763 | * %PF_NO_SETAFFINITY. | ||
1764 | */ | ||
1765 | worker->task->flags |= PF_THREAD_BOUND; | ||
1766 | 1762 | ||
1767 | /* | 1763 | /* |
1768 | * The caller is responsible for ensuring %POOL_DISASSOCIATED | 1764 | * The caller is responsible for ensuring %POOL_DISASSOCIATED |
@@ -3876,7 +3872,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, | |||
3876 | } | 3872 | } |
3877 | 3873 | ||
3878 | wq->rescuer = rescuer; | 3874 | wq->rescuer = rescuer; |
3879 | rescuer->task->flags |= PF_THREAD_BOUND; | 3875 | rescuer->task->flags |= PF_NO_SETAFFINITY; |
3880 | wake_up_process(rescuer->task); | 3876 | wake_up_process(rescuer->task); |
3881 | } | 3877 | } |
3882 | 3878 | ||