sched: make double-lock-balance fair

double_lock balance() currently favors logically lower cpus since they often do not have to release their own lock to acquire a second lock. The result is that logically higher cpus can get starved when there is a lot of pressure on the RQs. This can result in higher latencies on higher cpu-ids. This patch makes the algorithm more fair by forcing all paths to have to release both locks before acquiring them again. Since callsites to double_lock_balance already consider it a potential preemption/reschedule point, they have the proper logic to recheck for atomicity violations. Signed-off-by: Gregory Haskins <ghaskins@novell.com>
author: Gregory Haskins <ghaskins@novell.com> 2008-12-29 09:39:51 -0500
committer: Gregory Haskins <ghaskins@novell.com> 2008-12-29 09:39:51 -0500
commit: 8f45e2b516201d1bf681e6026fa5276385def565 (patch)
tree: b50d5cc2d6932d1f33f222bdf6052cfa32cddc8c
parent: 7e96fa5875d4a9be18d74d3ca7b90518d05bc426 (diff)
1 files changed, 44 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 94d9a6c5ff94..8fca364f3593 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1608,21 +1608,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 #endif
+#ifdef CONFIG_PREEMPT
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations.  This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below.  However, it
+ * also adds more overhead and therefore may reduce throughput.
 */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+        __releases(this_rq->lock)
+        __acquires(busiest->lock)
+        __acquires(this_rq->lock)
+{
+        spin_unlock(&this_rq->lock);
+        double_rq_lock(this_rq, busiest);
+        return 1;
+}
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry.  This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(this_rq->lock)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
 {
        int ret = 0;
-        if (unlikely(!irqs_disabled())) {
-                /* printk() doesn't work good under rq->lock */
-                spin_unlock(&this_rq->lock);
-                BUG_ON(1);
-        }
        if (unlikely(!spin_trylock(&busiest->lock))) {
                if (busiest < this_rq) {
                        spin_unlock(&this_rq->lock);
@@ -1635,6 +1656,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
        return ret;
 }
+#endif /* CONFIG_PREEMPT */
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+        if (unlikely(!irqs_disabled())) {
+                /* printk() doesn't work good under rq->lock */
+                spin_unlock(&this_rq->lock);
+                BUG_ON(1);
+        }
+        return _double_lock_balance(this_rq, busiest);
+}
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
 {
author	Gregory Haskins <ghaskins@novell.com>	2008-12-29 09:39:51 -0500
committer	Gregory Haskins <ghaskins@novell.com>	2008-12-29 09:39:51 -0500
commit	8f45e2b516201d1bf681e6026fa5276385def565 (patch)
tree	b50d5cc2d6932d1f33f222bdf6052cfa32cddc8c
parent	7e96fa5875d4a9be18d74d3ca7b90518d05bc426 (diff)

diff --git a/kernel/sched.c b/kernel/sched.c index 94d9a6c5ff94..8fca364f3593 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -1608,21 +1608,42 @@ static inline void update_shares_locked(struct rq rq, struct sched_domain sd)
1608		1608
1609	#endif	1609	#endif
1610		1610
		1611	#ifdef CONFIG_PREEMPT
		1612
1611	/*	1613	/*
1612	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.	1614	* fair double_lock_balance: Safely acquires both rq->locks in a fair
		1615	* way at the expense of forcing extra atomic operations in all
		1616	* invocations. This assures that the double_lock is acquired using the
		1617	* same underlying policy as the spinlock_t on this architecture, which
		1618	* reduces latency compared to the unfair variant below. However, it
		1619	* also adds more overhead and therefore may reduce throughput.
1613	*/	1620	*/
1614	static int double_lock_balance(struct rq this_rq, struct rq busiest)	1621	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
		1622	__releases(this_rq->lock)
		1623	__acquires(busiest->lock)
		1624	__acquires(this_rq->lock)
		1625	{
		1626	spin_unlock(&this_rq->lock);
		1627	double_rq_lock(this_rq, busiest);
		1628
		1629	return 1;
		1630	}
		1631
		1632	#else
		1633	/*
		1634	* Unfair double_lock_balance: Optimizes throughput at the expense of
		1635	* latency by eliminating extra atomic operations when the locks are
		1636	* already in proper order on entry. This favors lower cpu-ids and will
		1637	* grant the double lock to lower cpus over higher ids under contention,
		1638	* regardless of entry order into the function.
		1639	*/
		1640	static int _double_lock_balance(struct rq this_rq, struct rq busiest)
1615	__releases(this_rq->lock)	1641	__releases(this_rq->lock)
1616	__acquires(busiest->lock)	1642	__acquires(busiest->lock)
1617	__acquires(this_rq->lock)	1643	__acquires(this_rq->lock)
1618	{	1644	{
1619	int ret = 0;	1645	int ret = 0;
1620		1646
1621	if (unlikely(!irqs_disabled())) {
1622	/* printk() doesn't work good under rq->lock */
1623	spin_unlock(&this_rq->lock);
1624	BUG_ON(1);
1625	}
1626	if (unlikely(!spin_trylock(&busiest->lock))) {	1647	if (unlikely(!spin_trylock(&busiest->lock))) {
1627	if (busiest < this_rq) {	1648	if (busiest < this_rq) {
1628	spin_unlock(&this_rq->lock);	1649	spin_unlock(&this_rq->lock);
@@ -1635,6 +1656,22 @@ static int double_lock_balance(struct rq this_rq, struct rq busiest)
1635	return ret;	1656	return ret;
1636	}	1657	}
1637		1658
		1659	#endif /* CONFIG_PREEMPT */
		1660
		1661	/*
		1662	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
		1663	*/
		1664	static int double_lock_balance(struct rq this_rq, struct rq busiest)
		1665	{
		1666	if (unlikely(!irqs_disabled())) {
		1667	/* printk() doesn't work good under rq->lock */
		1668	spin_unlock(&this_rq->lock);
		1669	BUG_ON(1);
		1670	}
		1671
		1672	return _double_lock_balance(this_rq, busiest);
		1673	}
		1674
1638	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)	1675	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)
1639	__releases(busiest->lock)	1676	__releases(busiest->lock)
1640	{	1677	{