sched/rtmutex: Refactor rt_mutex_setprio()

With the introduction of SCHED_DEADLINE the whole notion that priority is a single number is gone, therefore the @prio argument to rt_mutex_setprio() doesn't make sense anymore. So rework the code to pass a pi_task instead. Note this also fixes a problem with pi_top_task caching; previously we would not set the pointer (call rt_mutex_update_top_task) if the priority didn't change, this could lead to a stale pointer. As for the XXX, I think its fine to use pi_task->prio, because if it differs from waiter->prio, a PI chain update is immenent. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Peter Zijlstra <peterz@infradead.org> 2017-03-23 10:56:11 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2017-04-04 05:44:06 -0400
commit: acd58620e415aee4a43a808d7d2fd87259ee0001 (patch)
tree: b0971a53edac32523a6b99b4bd5f15200041634e /kernel/locking/rtmutex.c
parent: aa2bfe55366552cb7e93e8709d66e698d79ccc47 (diff)
1 files changed, 30 insertions, 82 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4b1015ef0dc7..00b49cdbb4e0 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
        RB_CLEAR_NODE(&waiter->pi_tree_entry);
 }
-/*
+static void rt_mutex_adjust_prio(struct task_struct *p)
- * Must hold both p->pi_lock and task_rq(p)->lock.
- */
-void rt_mutex_update_top_task(struct task_struct *p)
-{
-        if (!task_has_pi_waiters(p)) {
-                p->pi_top_task = NULL;
-                return;
-        }
-        p->pi_top_task = task_top_pi_waiter(p)->task;
-}
-/*
- * Calculate task priority from the waiter tree priority
- *
- * Return task->normal_prio when the waiter tree is empty or when
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
-{
-        if (likely(!task_has_pi_waiters(task)))
-                return task->normal_prio;
-        return min(task_top_pi_waiter(task)->prio,
-                   task->normal_prio);
-}
-/*
- * Must hold either p->pi_lock or task_rq(p)->lock.
- */
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
-        return task->pi_top_task;
-}
-/*
- * Called by sched_setscheduler() to get the priority which will be
- * effective after the change.
- */
-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
 {
-        struct task_struct *top_task = rt_mutex_get_top_task(task);
+        struct task_struct *pi_task = NULL;
-        if (!top_task)
+        lockdep_assert_held(&p->pi_lock);
-                return newprio;
-        return min(top_task->prio, newprio);
+        if (task_has_pi_waiters(p))
-}
+                pi_task = task_top_pi_waiter(p)->task;
-/*
+        rt_mutex_setprio(p, pi_task);
- * Adjust the priority of a task, after its pi_waiters got modified.
- *
- * This can be both boosting and unboosting. task->pi_lock must be held.
- */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
-{
-        int prio = rt_mutex_getprio(task);
-        if (task->prio != prio || dl_prio(prio))
-                rt_mutex_setprio(task, prio);
 }
 /*
@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
                rt_mutex_enqueue_pi(task, waiter);
-                __rt_mutex_adjust_prio(task);
+                rt_mutex_adjust_prio(task);
        } else if (prerequeue_top_waiter == waiter) {
                /*
@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                rt_mutex_dequeue_pi(task, waiter);
                waiter = rt_mutex_top_waiter(lock);
                rt_mutex_enqueue_pi(task, waiter);
-                __rt_mutex_adjust_prio(task);
+                rt_mutex_adjust_prio(task);
        } else {
                /*
                 * Nothing changed. No need to do any priority
@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                return -EDEADLK;
        raw_spin_lock(&task->pi_lock);
-        __rt_mutex_adjust_prio(task);
+        rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
        waiter->prio = task->prio;
@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
-                __rt_mutex_adjust_prio(owner);
+                rt_mutex_adjust_prio(owner);
                if (owner->pi_blocked_on)
                        chain_walk = 1;
        } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
        waiter = rt_mutex_top_waiter(lock);
        /*
-         * Remove it from current->pi_waiters. We do not adjust a
+         * Remove it from current->pi_waiters and deboost.
-         * possible priority boost right now. We execute wakeup in the
+         *
-         * boosted mode and go back to normal after releasing
+         * We must in fact deboost here in order to ensure we call
-         * lock->wait_lock.
+         * rt_mutex_setprio() to update p->pi_top_task before the
+         * task unblocks.
         */
        rt_mutex_dequeue_pi(current, waiter);
-        __rt_mutex_adjust_prio(current);
+        rt_mutex_adjust_prio(current);
        /*
         * As we are waking up the top waiter, and the waiter stays
@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
-        raw_spin_unlock(&current->pi_lock);
+        /*
+         * We deboosted before waking the top waiter task such that we don't
+         * run two tasks with the 'same' priority (and ensure the
+         * p->pi_top_task pointer points to a blocked task). This however can
+         * lead to priority inversion if we would get preempted after the
+         * deboost but before waking our donor task, hence the preempt_disable()
+         * before unlock.
+         *
+         * Pairs with preempt_enable() in rt_mutex_postunlock();
+         */
+        preempt_disable();
        wake_q_add(wake_q, waiter->task);
+        raw_spin_unlock(&current->pi_lock);
 }
 /*
@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mutex *lock,
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
-        __rt_mutex_adjust_prio(owner);
+        rt_mutex_adjust_prio(owner);
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
        raw_spin_lock_irqsave(&task->pi_lock, flags);
        waiter = task->pi_blocked_on;
-        if (!waiter || (waiter->prio == task->prio &&
+        if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
-                        !dl_prio(task->prio))) {
                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                return;
        }
@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         * Queue the next waiter for wakeup once we release the wait_lock.
         */
        mark_wakeup_next_waiter(wake_q, lock);
-        /*
-         * We should deboost before waking the top waiter task such that
-         * we don't run two tasks with the 'same' priority. This however
-         * can lead to prio-inversion if we would get preempted after
-         * the deboost but before waking our high-prio task, hence the
-         * preempt_disable before unlock. Pairs with preempt_enable() in
-         * rt_mutex_postunlock();
-         */
-        preempt_disable();
        raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        return true; /* call rt_mutex_postunlock() */
author	Peter Zijlstra <peterz@infradead.org>	2017-03-23 10:56:11 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2017-04-04 05:44:06 -0400
commit	acd58620e415aee4a43a808d7d2fd87259ee0001 (patch)
tree	b0971a53edac32523a6b99b4bd5f15200041634e /kernel/locking/rtmutex.c
parent	aa2bfe55366552cb7e93e8709d66e698d79ccc47 (diff)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4b1015ef0dc7..00b49cdbb4e0 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c
@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct task, struct rt_mutex_waiter waiter)
322	RB_CLEAR_NODE(&waiter->pi_tree_entry);	322	RB_CLEAR_NODE(&waiter->pi_tree_entry);
323	}	323	}
324		324
325	/*	325	static void rt_mutex_adjust_prio(struct task_struct *p)
326	* Must hold both p->pi_lock and task_rq(p)->lock.
327	*/
328	void rt_mutex_update_top_task(struct task_struct *p)
329	{
330	if (!task_has_pi_waiters(p)) {
331	p->pi_top_task = NULL;
332	return;
333	}
334
335	p->pi_top_task = task_top_pi_waiter(p)->task;
336	}
337
338	/*
339	* Calculate task priority from the waiter tree priority
340	*
341	* Return task->normal_prio when the waiter tree is empty or when
342	* the waiter is not allowed to do priority boosting
343	*/
344	int rt_mutex_getprio(struct task_struct *task)
345	{
346	if (likely(!task_has_pi_waiters(task)))
347	return task->normal_prio;
348
349	return min(task_top_pi_waiter(task)->prio,
350	task->normal_prio);
351	}
352
353	/*
354	* Must hold either p->pi_lock or task_rq(p)->lock.
355	*/
356	struct task_struct rt_mutex_get_top_task(struct task_struct task)
357	{
358	return task->pi_top_task;
359	}
360
361	/*
362	* Called by sched_setscheduler() to get the priority which will be
363	* effective after the change.
364	*/
365	int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
366	{	326	{
367	struct task_struct *top_task = rt_mutex_get_top_task(task);	327	struct task_struct *pi_task = NULL;
368		328
369	if (!top_task)	329	lockdep_assert_held(&p->pi_lock);
370	return newprio;
371		330
372	return min(top_task->prio, newprio);	331	if (task_has_pi_waiters(p))
373	}	332	pi_task = task_top_pi_waiter(p)->task;
374		333
375	/*	334	rt_mutex_setprio(p, pi_task);
376	* Adjust the priority of a task, after its pi_waiters got modified.
377	*
378	* This can be both boosting and unboosting. task->pi_lock must be held.
379	*/
380	static void __rt_mutex_adjust_prio(struct task_struct *task)
381	{
382	int prio = rt_mutex_getprio(task);
383
384	if (task->prio != prio \|\| dl_prio(prio))
385	rt_mutex_setprio(task, prio);
386	}	335	}
387		336
388	/*	337	/*
@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
742	*/	691	*/
743	rt_mutex_dequeue_pi(task, prerequeue_top_waiter);	692	rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
744	rt_mutex_enqueue_pi(task, waiter);	693	rt_mutex_enqueue_pi(task, waiter);
745	__rt_mutex_adjust_prio(task);	694	rt_mutex_adjust_prio(task);
746		695
747	} else if (prerequeue_top_waiter == waiter) {	696	} else if (prerequeue_top_waiter == waiter) {
748	/*	697	/*
@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
758	rt_mutex_dequeue_pi(task, waiter);	707	rt_mutex_dequeue_pi(task, waiter);
759	waiter = rt_mutex_top_waiter(lock);	708	waiter = rt_mutex_top_waiter(lock);
760	rt_mutex_enqueue_pi(task, waiter);	709	rt_mutex_enqueue_pi(task, waiter);
761	__rt_mutex_adjust_prio(task);	710	rt_mutex_adjust_prio(task);
762	} else {	711	} else {
763	/*	712	/*
764	* Nothing changed. No need to do any priority	713	* Nothing changed. No need to do any priority
@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
966	return -EDEADLK;	915	return -EDEADLK;
967		916
968	raw_spin_lock(&task->pi_lock);	917	raw_spin_lock(&task->pi_lock);
969	__rt_mutex_adjust_prio(task);	918	rt_mutex_adjust_prio(task);
970	waiter->task = task;	919	waiter->task = task;
971	waiter->lock = lock;	920	waiter->lock = lock;
972	waiter->prio = task->prio;	921	waiter->prio = task->prio;
@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
988	rt_mutex_dequeue_pi(owner, top_waiter);	937	rt_mutex_dequeue_pi(owner, top_waiter);
989	rt_mutex_enqueue_pi(owner, waiter);	938	rt_mutex_enqueue_pi(owner, waiter);
990		939
991	__rt_mutex_adjust_prio(owner);	940	rt_mutex_adjust_prio(owner);
992	if (owner->pi_blocked_on)	941	if (owner->pi_blocked_on)
993	chain_walk = 1;	942	chain_walk = 1;
994	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {	943	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1040	waiter = rt_mutex_top_waiter(lock);	989	waiter = rt_mutex_top_waiter(lock);
1041		990
1042	/*	991	/*
1043	* Remove it from current->pi_waiters. We do not adjust a	992	* Remove it from current->pi_waiters and deboost.
1044	* possible priority boost right now. We execute wakeup in the	993	*
1045	* boosted mode and go back to normal after releasing	994	* We must in fact deboost here in order to ensure we call
1046	* lock->wait_lock.	995	* rt_mutex_setprio() to update p->pi_top_task before the
		996	* task unblocks.
1047	*/	997	*/
1048	rt_mutex_dequeue_pi(current, waiter);	998	rt_mutex_dequeue_pi(current, waiter);
1049	__rt_mutex_adjust_prio(current);	999	rt_mutex_adjust_prio(current);
1050		1000
1051	/*	1001	/*
1052	* As we are waking up the top waiter, and the waiter stays	1002	* As we are waking up the top waiter, and the waiter stays
@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1058	*/	1008	*/
1059	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;	1009	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1060		1010
1061	raw_spin_unlock(&current->pi_lock);	1011	/*
1062		1012	* We deboosted before waking the top waiter task such that we don't
		1013	* run two tasks with the 'same' priority (and ensure the
		1014	* p->pi_top_task pointer points to a blocked task). This however can
		1015	* lead to priority inversion if we would get preempted after the
		1016	* deboost but before waking our donor task, hence the preempt_disable()
		1017	* before unlock.
		1018	*
		1019	* Pairs with preempt_enable() in rt_mutex_postunlock();
		1020	*/
		1021	preempt_disable();
1063	wake_q_add(wake_q, waiter->task);	1022	wake_q_add(wake_q, waiter->task);
		1023	raw_spin_unlock(&current->pi_lock);
1064	}	1024	}
1065		1025
1066	/*	1026	/*
@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mutex *lock,
1095	if (rt_mutex_has_waiters(lock))	1055	if (rt_mutex_has_waiters(lock))
1096	rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));	1056	rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1097		1057
1098	__rt_mutex_adjust_prio(owner);	1058	rt_mutex_adjust_prio(owner);
1099		1059
1100	/* Store the lock on which owner is blocked or NULL */	1060	/* Store the lock on which owner is blocked or NULL */
1101	next_lock = task_blocked_on_lock(owner);	1061	next_lock = task_blocked_on_lock(owner);
@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
1134	raw_spin_lock_irqsave(&task->pi_lock, flags);	1094	raw_spin_lock_irqsave(&task->pi_lock, flags);
1135		1095
1136	waiter = task->pi_blocked_on;	1096	waiter = task->pi_blocked_on;
1137	if (!waiter \|\| (waiter->prio == task->prio &&	1097	if (!waiter \|\| (waiter->prio == task->prio && !dl_prio(task->prio))) {
1138	!dl_prio(task->prio))) {
1139	raw_spin_unlock_irqrestore(&task->pi_lock, flags);	1098	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1140	return;	1099	return;
1141	}	1100	}
@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1389	* Queue the next waiter for wakeup once we release the wait_lock.	1348	* Queue the next waiter for wakeup once we release the wait_lock.
1390	*/	1349	*/
1391	mark_wakeup_next_waiter(wake_q, lock);	1350	mark_wakeup_next_waiter(wake_q, lock);
1392
1393	/*
1394	* We should deboost before waking the top waiter task such that
1395	* we don't run two tasks with the 'same' priority. This however
1396	* can lead to prio-inversion if we would get preempted after
1397	* the deboost but before waking our high-prio task, hence the
1398	* preempt_disable before unlock. Pairs with preempt_enable() in
1399	* rt_mutex_postunlock();
1400	*/
1401	preempt_disable();
1402
1403	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);	1351	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1404		1352
1405	return true; /* call rt_mutex_postunlock() */	1353	return true; /* call rt_mutex_postunlock() */