aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-04-05 11:23:51 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-14 02:52:38 -0400
commit0122ec5b02f766c355b3168df53a6c038a24fa0d (patch)
tree5fa335e80a0110a14e3b9de98780149283f73bbe
parent2acca55ed98ad9b9aa25e7e587ebe306c0313dc7 (diff)
sched: Add p->pi_lock to task_rq_lock()
In order to be able to call set_task_cpu() while either holding p->pi_lock or task_rq(p)->lock we need to hold both locks in order to stabilize task_rq(). This makes task_rq_lock() acquire both locks, and have __task_rq_lock() validate that p->pi_lock is held. This increases the locking overhead for most scheduler syscalls but allows reduction of rq->lock contention for some scheduler hot paths (ttwu). Reviewed-by: Frank Rowand <frank.rowand@am.sony.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110405152729.232781355@chello.nl Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c103
1 files changed, 47 insertions, 56 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b269b79c52c..f1551271a685 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -599,7 +599,7 @@ static inline int cpu_of(struct rq *rq)
599 * Return the group to which this tasks belongs. 599 * Return the group to which this tasks belongs.
600 * 600 *
601 * We use task_subsys_state_check() and extend the RCU verification 601 * We use task_subsys_state_check() and extend the RCU verification
602 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() 602 * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
603 * holds that lock for each task it moves into the cgroup. Therefore 603 * holds that lock for each task it moves into the cgroup. Therefore
604 * by holding that lock, we pin the task to the current cgroup. 604 * by holding that lock, we pin the task to the current cgroup.
605 */ 605 */
@@ -609,7 +609,7 @@ static inline struct task_group *task_group(struct task_struct *p)
609 struct cgroup_subsys_state *css; 609 struct cgroup_subsys_state *css;
610 610
611 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 611 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
612 lockdep_is_held(&task_rq(p)->lock)); 612 lockdep_is_held(&p->pi_lock));
613 tg = container_of(css, struct task_group, css); 613 tg = container_of(css, struct task_group, css);
614 614
615 return autogroup_task_group(p, tg); 615 return autogroup_task_group(p, tg);
@@ -924,23 +924,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
924#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 924#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
925 925
926/* 926/*
927 * Check whether the task is waking, we use this to synchronize ->cpus_allowed 927 * __task_rq_lock - lock the rq @p resides on.
928 * against ttwu().
929 */
930static inline int task_is_waking(struct task_struct *p)
931{
932 return unlikely(p->state == TASK_WAKING);
933}
934
935/*
936 * __task_rq_lock - lock the runqueue a given task resides on.
937 * Must be called interrupts disabled.
938 */ 928 */
939static inline struct rq *__task_rq_lock(struct task_struct *p) 929static inline struct rq *__task_rq_lock(struct task_struct *p)
940 __acquires(rq->lock) 930 __acquires(rq->lock)
941{ 931{
942 struct rq *rq; 932 struct rq *rq;
943 933
934 lockdep_assert_held(&p->pi_lock);
935
944 for (;;) { 936 for (;;) {
945 rq = task_rq(p); 937 rq = task_rq(p);
946 raw_spin_lock(&rq->lock); 938 raw_spin_lock(&rq->lock);
@@ -951,22 +943,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
951} 943}
952 944
953/* 945/*
954 * task_rq_lock - lock the runqueue a given task resides on and disable 946 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
955 * interrupts. Note the ordering: we can safely lookup the task_rq without
956 * explicitly disabling preemption.
957 */ 947 */
958static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) 948static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
949 __acquires(p->pi_lock)
959 __acquires(rq->lock) 950 __acquires(rq->lock)
960{ 951{
961 struct rq *rq; 952 struct rq *rq;
962 953
963 for (;;) { 954 for (;;) {
964 local_irq_save(*flags); 955 raw_spin_lock_irqsave(&p->pi_lock, *flags);
965 rq = task_rq(p); 956 rq = task_rq(p);
966 raw_spin_lock(&rq->lock); 957 raw_spin_lock(&rq->lock);
967 if (likely(rq == task_rq(p))) 958 if (likely(rq == task_rq(p)))
968 return rq; 959 return rq;
969 raw_spin_unlock_irqrestore(&rq->lock, *flags); 960 raw_spin_unlock(&rq->lock);
961 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
970 } 962 }
971} 963}
972 964
@@ -976,10 +968,13 @@ static void __task_rq_unlock(struct rq *rq)
976 raw_spin_unlock(&rq->lock); 968 raw_spin_unlock(&rq->lock);
977} 969}
978 970
979static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 971static inline void
972task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
980 __releases(rq->lock) 973 __releases(rq->lock)
974 __releases(p->pi_lock)
981{ 975{
982 raw_spin_unlock_irqrestore(&rq->lock, *flags); 976 raw_spin_unlock(&rq->lock);
977 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
983} 978}
984 979
985/* 980/*
@@ -2175,6 +2170,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2175 */ 2170 */
2176 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && 2171 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2177 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); 2172 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2173
2174#ifdef CONFIG_LOCKDEP
2175 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
2176 lockdep_is_held(&task_rq(p)->lock)));
2177#endif
2178#endif 2178#endif
2179 2179
2180 trace_sched_migrate_task(p, new_cpu); 2180 trace_sched_migrate_task(p, new_cpu);
@@ -2270,7 +2270,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2270 ncsw = 0; 2270 ncsw = 0;
2271 if (!match_state || p->state == match_state) 2271 if (!match_state || p->state == match_state)
2272 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 2272 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2273 task_rq_unlock(rq, &flags); 2273 task_rq_unlock(rq, p, &flags);
2274 2274
2275 /* 2275 /*
2276 * If it changed from the expected state, bail out now. 2276 * If it changed from the expected state, bail out now.
@@ -2652,6 +2652,7 @@ static void __sched_fork(struct task_struct *p)
2652 */ 2652 */
2653void sched_fork(struct task_struct *p, int clone_flags) 2653void sched_fork(struct task_struct *p, int clone_flags)
2654{ 2654{
2655 unsigned long flags;
2655 int cpu = get_cpu(); 2656 int cpu = get_cpu();
2656 2657
2657 __sched_fork(p); 2658 __sched_fork(p);
@@ -2702,9 +2703,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
2702 * 2703 *
2703 * Silence PROVE_RCU. 2704 * Silence PROVE_RCU.
2704 */ 2705 */
2705 rcu_read_lock(); 2706 raw_spin_lock_irqsave(&p->pi_lock, flags);
2706 set_task_cpu(p, cpu); 2707 set_task_cpu(p, cpu);
2707 rcu_read_unlock(); 2708 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2708 2709
2709#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2710#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2710 if (likely(sched_info_on())) 2711 if (likely(sched_info_on()))
@@ -2753,7 +2754,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2753 set_task_cpu(p, cpu); 2754 set_task_cpu(p, cpu);
2754 2755
2755 p->state = TASK_RUNNING; 2756 p->state = TASK_RUNNING;
2756 task_rq_unlock(rq, &flags); 2757 task_rq_unlock(rq, p, &flags);
2757#endif 2758#endif
2758 2759
2759 rq = task_rq_lock(p, &flags); 2760 rq = task_rq_lock(p, &flags);
@@ -2765,7 +2766,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2765 if (p->sched_class->task_woken) 2766 if (p->sched_class->task_woken)
2766 p->sched_class->task_woken(rq, p); 2767 p->sched_class->task_woken(rq, p);
2767#endif 2768#endif
2768 task_rq_unlock(rq, &flags); 2769 task_rq_unlock(rq, p, &flags);
2769 put_cpu(); 2770 put_cpu();
2770} 2771}
2771 2772
@@ -3490,12 +3491,12 @@ void sched_exec(void)
3490 likely(cpu_active(dest_cpu)) && need_migrate_task(p)) { 3491 likely(cpu_active(dest_cpu)) && need_migrate_task(p)) {
3491 struct migration_arg arg = { p, dest_cpu }; 3492 struct migration_arg arg = { p, dest_cpu };
3492 3493
3493 task_rq_unlock(rq, &flags); 3494 task_rq_unlock(rq, p, &flags);
3494 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 3495 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
3495 return; 3496 return;
3496 } 3497 }
3497unlock: 3498unlock:
3498 task_rq_unlock(rq, &flags); 3499 task_rq_unlock(rq, p, &flags);
3499} 3500}
3500 3501
3501#endif 3502#endif
@@ -3532,7 +3533,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
3532 3533
3533 rq = task_rq_lock(p, &flags); 3534 rq = task_rq_lock(p, &flags);
3534 ns = do_task_delta_exec(p, rq); 3535 ns = do_task_delta_exec(p, rq);
3535 task_rq_unlock(rq, &flags); 3536 task_rq_unlock(rq, p, &flags);
3536 3537
3537 return ns; 3538 return ns;
3538} 3539}
@@ -3550,7 +3551,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3550 3551
3551 rq = task_rq_lock(p, &flags); 3552 rq = task_rq_lock(p, &flags);
3552 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 3553 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
3553 task_rq_unlock(rq, &flags); 3554 task_rq_unlock(rq, p, &flags);
3554 3555
3555 return ns; 3556 return ns;
3556} 3557}
@@ -3574,7 +3575,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
3574 rq = task_rq_lock(p, &flags); 3575 rq = task_rq_lock(p, &flags);
3575 thread_group_cputime(p, &totals); 3576 thread_group_cputime(p, &totals);
3576 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); 3577 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
3577 task_rq_unlock(rq, &flags); 3578 task_rq_unlock(rq, p, &flags);
3578 3579
3579 return ns; 3580 return ns;
3580} 3581}
@@ -4693,16 +4694,13 @@ EXPORT_SYMBOL(sleep_on_timeout);
4693 */ 4694 */
4694void rt_mutex_setprio(struct task_struct *p, int prio) 4695void rt_mutex_setprio(struct task_struct *p, int prio)
4695{ 4696{
4696 unsigned long flags;
4697 int oldprio, on_rq, running; 4697 int oldprio, on_rq, running;
4698 struct rq *rq; 4698 struct rq *rq;
4699 const struct sched_class *prev_class; 4699 const struct sched_class *prev_class;
4700 4700
4701 BUG_ON(prio < 0 || prio > MAX_PRIO); 4701 BUG_ON(prio < 0 || prio > MAX_PRIO);
4702 4702
4703 lockdep_assert_held(&p->pi_lock); 4703 rq = __task_rq_lock(p);
4704
4705 rq = task_rq_lock(p, &flags);
4706 4704
4707 trace_sched_pi_setprio(p, prio); 4705 trace_sched_pi_setprio(p, prio);
4708 oldprio = p->prio; 4706 oldprio = p->prio;
@@ -4727,7 +4725,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4727 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4725 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4728 4726
4729 check_class_changed(rq, p, prev_class, oldprio); 4727 check_class_changed(rq, p, prev_class, oldprio);
4730 task_rq_unlock(rq, &flags); 4728 __task_rq_unlock(rq);
4731} 4729}
4732 4730
4733#endif 4731#endif
@@ -4775,7 +4773,7 @@ void set_user_nice(struct task_struct *p, long nice)
4775 resched_task(rq->curr); 4773 resched_task(rq->curr);
4776 } 4774 }
4777out_unlock: 4775out_unlock:
4778 task_rq_unlock(rq, &flags); 4776 task_rq_unlock(rq, p, &flags);
4779} 4777}
4780EXPORT_SYMBOL(set_user_nice); 4778EXPORT_SYMBOL(set_user_nice);
4781 4779
@@ -5003,20 +5001,17 @@ recheck:
5003 /* 5001 /*
5004 * make sure no PI-waiters arrive (or leave) while we are 5002 * make sure no PI-waiters arrive (or leave) while we are
5005 * changing the priority of the task: 5003 * changing the priority of the task:
5006 */ 5004 *
5007 raw_spin_lock_irqsave(&p->pi_lock, flags);
5008 /*
5009 * To be able to change p->policy safely, the appropriate 5005 * To be able to change p->policy safely, the appropriate
5010 * runqueue lock must be held. 5006 * runqueue lock must be held.
5011 */ 5007 */
5012 rq = __task_rq_lock(p); 5008 rq = task_rq_lock(p, &flags);
5013 5009
5014 /* 5010 /*
5015 * Changing the policy of the stop threads its a very bad idea 5011 * Changing the policy of the stop threads its a very bad idea
5016 */ 5012 */
5017 if (p == rq->stop) { 5013 if (p == rq->stop) {
5018 __task_rq_unlock(rq); 5014 task_rq_unlock(rq, p, &flags);
5019 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5020 return -EINVAL; 5015 return -EINVAL;
5021 } 5016 }
5022 5017
@@ -5040,8 +5035,7 @@ recheck:
5040 if (rt_bandwidth_enabled() && rt_policy(policy) && 5035 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5041 task_group(p)->rt_bandwidth.rt_runtime == 0 && 5036 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5042 !task_group_is_autogroup(task_group(p))) { 5037 !task_group_is_autogroup(task_group(p))) {
5043 __task_rq_unlock(rq); 5038 task_rq_unlock(rq, p, &flags);
5044 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5045 return -EPERM; 5039 return -EPERM;
5046 } 5040 }
5047 } 5041 }
@@ -5050,8 +5044,7 @@ recheck:
5050 /* recheck policy now with rq lock held */ 5044 /* recheck policy now with rq lock held */
5051 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 5045 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
5052 policy = oldpolicy = -1; 5046 policy = oldpolicy = -1;
5053 __task_rq_unlock(rq); 5047 task_rq_unlock(rq, p, &flags);
5054 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5055 goto recheck; 5048 goto recheck;
5056 } 5049 }
5057 on_rq = p->on_rq; 5050 on_rq = p->on_rq;
@@ -5073,8 +5066,7 @@ recheck:
5073 activate_task(rq, p, 0); 5066 activate_task(rq, p, 0);
5074 5067
5075 check_class_changed(rq, p, prev_class, oldprio); 5068 check_class_changed(rq, p, prev_class, oldprio);
5076 __task_rq_unlock(rq); 5069 task_rq_unlock(rq, p, &flags);
5077 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5078 5070
5079 rt_mutex_adjust_pi(p); 5071 rt_mutex_adjust_pi(p);
5080 5072
@@ -5666,7 +5658,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5666 5658
5667 rq = task_rq_lock(p, &flags); 5659 rq = task_rq_lock(p, &flags);
5668 time_slice = p->sched_class->get_rr_interval(rq, p); 5660 time_slice = p->sched_class->get_rr_interval(rq, p);
5669 task_rq_unlock(rq, &flags); 5661 task_rq_unlock(rq, p, &flags);
5670 5662
5671 rcu_read_unlock(); 5663 rcu_read_unlock();
5672 jiffies_to_timespec(time_slice, &t); 5664 jiffies_to_timespec(time_slice, &t);
@@ -5889,8 +5881,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5889 unsigned int dest_cpu; 5881 unsigned int dest_cpu;
5890 int ret = 0; 5882 int ret = 0;
5891 5883
5892 raw_spin_lock_irqsave(&p->pi_lock, flags); 5884 rq = task_rq_lock(p, &flags);
5893 rq = __task_rq_lock(p);
5894 5885
5895 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 5886 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5896 ret = -EINVAL; 5887 ret = -EINVAL;
@@ -5918,15 +5909,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5918 if (need_migrate_task(p)) { 5909 if (need_migrate_task(p)) {
5919 struct migration_arg arg = { p, dest_cpu }; 5910 struct migration_arg arg = { p, dest_cpu };
5920 /* Need help from migration thread: drop lock and wait. */ 5911 /* Need help from migration thread: drop lock and wait. */
5921 __task_rq_unlock(rq); 5912 task_rq_unlock(rq, p, &flags);
5922 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5923 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 5913 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5924 tlb_migrate_finish(p->mm); 5914 tlb_migrate_finish(p->mm);
5925 return 0; 5915 return 0;
5926 } 5916 }
5927out: 5917out:
5928 __task_rq_unlock(rq); 5918 task_rq_unlock(rq, p, &flags);
5929 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5930 5919
5931 return ret; 5920 return ret;
5932} 5921}
@@ -5954,6 +5943,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5954 rq_src = cpu_rq(src_cpu); 5943 rq_src = cpu_rq(src_cpu);
5955 rq_dest = cpu_rq(dest_cpu); 5944 rq_dest = cpu_rq(dest_cpu);
5956 5945
5946 raw_spin_lock(&p->pi_lock);
5957 double_rq_lock(rq_src, rq_dest); 5947 double_rq_lock(rq_src, rq_dest);
5958 /* Already moved. */ 5948 /* Already moved. */
5959 if (task_cpu(p) != src_cpu) 5949 if (task_cpu(p) != src_cpu)
@@ -5976,6 +5966,7 @@ done:
5976 ret = 1; 5966 ret = 1;
5977fail: 5967fail:
5978 double_rq_unlock(rq_src, rq_dest); 5968 double_rq_unlock(rq_src, rq_dest);
5969 raw_spin_unlock(&p->pi_lock);
5979 return ret; 5970 return ret;
5980} 5971}
5981 5972
@@ -8702,7 +8693,7 @@ void sched_move_task(struct task_struct *tsk)
8702 if (on_rq) 8693 if (on_rq)
8703 enqueue_task(rq, tsk, 0); 8694 enqueue_task(rq, tsk, 0);
8704 8695
8705 task_rq_unlock(rq, &flags); 8696 task_rq_unlock(rq, tsk, &flags);
8706} 8697}
8707#endif /* CONFIG_CGROUP_SCHED */ 8698#endif /* CONFIG_CGROUP_SCHED */
8708 8699