summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorKirill Tkhai <ktkhai@parallels.com>2014-08-20 05:47:42 -0400
committerIngo Molnar <mingo@kernel.org>2014-08-20 08:53:00 -0400
commitcca26e8009d1939a6a5bf0200d276fa26f03e536 (patch)
tree049a9c789b8eceb324b9d4e151ac0b1a6fa5b650 /kernel
parentda0c1e65b51a289540159663aa4b90ba2366bc21 (diff)
sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state
This is a new p->on_rq state which will be used to indicate that a task is in a process of migrating between two RQs. It allows to get rid of double_rq_lock(), which we used to use to change a rq of a queued task before. Let's consider an example. To move a task between src_rq and dst_rq we will do the following: raw_spin_lock(&src_rq->lock); /* p is a task which is queued on src_rq */ p = ...; dequeue_task(src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, dst_cpu); raw_spin_unlock(&src_rq->lock); /* * Both RQs are unlocked here. * Task p is dequeued from src_rq * but its on_rq value is not zero. */ raw_spin_lock(&dst_rq->lock); p->on_rq = TASK_ON_RQ_QUEUED; enqueue_task(dst_rq, p, 0); raw_spin_unlock(&dst_rq->lock); While p->on_rq is TASK_ON_RQ_MIGRATING, task is considered as "migrating", and other parallel scheduler actions with it are not available to parallel callers. The parallel caller is spining till migration is completed. The unavailable actions are changing of cpu affinity, changing of priority etc, in other words all the functionality which used to require task_rq(p)->lock before (and related to the task). To implement TASK_ON_RQ_MIGRATING support we primarily are using the following fact. Most of scheduler users (from which we are protecting a migrating task) use task_rq_lock() and __task_rq_lock() to get the lock of task_rq(p). These primitives know that task's cpu may change, and they are spining while the lock of the right RQ is not held. We add one more condition into them, so they will be also spinning until the migration is finished. Signed-off-by: Kirill Tkhai <ktkhai@parallels.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Turner <pjt@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Nicolas Pitre <nicolas.pitre@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1408528062.23412.88.camel@tkhai Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c12
-rw-r--r--kernel/sched/sched.h6
2 files changed, 15 insertions, 3 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a02b624fee6c..71b836034912 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
333 for (;;) { 333 for (;;) {
334 rq = task_rq(p); 334 rq = task_rq(p);
335 raw_spin_lock(&rq->lock); 335 raw_spin_lock(&rq->lock);
336 if (likely(rq == task_rq(p))) 336 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
337 return rq; 337 return rq;
338 raw_spin_unlock(&rq->lock); 338 raw_spin_unlock(&rq->lock);
339
340 while (unlikely(task_on_rq_migrating(p)))
341 cpu_relax();
339 } 342 }
340} 343}
341 344
@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
352 raw_spin_lock_irqsave(&p->pi_lock, *flags); 355 raw_spin_lock_irqsave(&p->pi_lock, *flags);
353 rq = task_rq(p); 356 rq = task_rq(p);
354 raw_spin_lock(&rq->lock); 357 raw_spin_lock(&rq->lock);
355 if (likely(rq == task_rq(p))) 358 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
356 return rq; 359 return rq;
357 raw_spin_unlock(&rq->lock); 360 raw_spin_unlock(&rq->lock);
358 raw_spin_unlock_irqrestore(&p->pi_lock, *flags); 361 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
362
363 while (unlikely(task_on_rq_migrating(p)))
364 cpu_relax();
359 } 365 }
360} 366}
361 367
@@ -1678,7 +1684,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1678 success = 1; /* we're going to change ->state */ 1684 success = 1; /* we're going to change ->state */
1679 cpu = task_cpu(p); 1685 cpu = task_cpu(p);
1680 1686
1681 if (task_on_rq_queued(p) && ttwu_remote(p, wake_flags)) 1687 if (p->on_rq && ttwu_remote(p, wake_flags))
1682 goto stat; 1688 goto stat;
1683 1689
1684#ifdef CONFIG_SMP 1690#ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 26566d0c67ac..aa0f73ba3777 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -17,6 +17,7 @@ struct rq;
17 17
18/* task_struct::on_rq states: */ 18/* task_struct::on_rq states: */
19#define TASK_ON_RQ_QUEUED 1 19#define TASK_ON_RQ_QUEUED 1
20#define TASK_ON_RQ_MIGRATING 2
20 21
21extern __read_mostly int scheduler_running; 22extern __read_mostly int scheduler_running;
22 23
@@ -950,6 +951,11 @@ static inline int task_on_rq_queued(struct task_struct *p)
950 return p->on_rq == TASK_ON_RQ_QUEUED; 951 return p->on_rq == TASK_ON_RQ_QUEUED;
951} 952}
952 953
954static inline int task_on_rq_migrating(struct task_struct *p)
955{
956 return p->on_rq == TASK_ON_RQ_MIGRATING;
957}
958
953#ifndef prepare_arch_switch 959#ifndef prepare_arch_switch
954# define prepare_arch_switch(next) do { } while (0) 960# define prepare_arch_switch(next) do { } while (0)
955#endif 961#endif