1 files changed, 78 insertions, 11 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..dd1a1466c1e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -464,11 +464,15 @@ struct rt_rq {
        struct rt_prio_array active;
        unsigned long rt_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-        int highest_prio; /* highest queued rt task prio */
+        struct {
+                int curr; /* highest queued rt task prio */
+                int next; /* next highest */
+        } highest_prio;
 #endif
 #ifdef CONFIG_SMP
        unsigned long rt_nr_migratory;
        int overloaded;
+        struct plist_head pushable_tasks;
 #endif
        int rt_throttled;
        u64 rt_time;
@@ -1607,21 +1611,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 #endif
+#ifdef CONFIG_PREEMPT
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations.  This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below.  However, it
+ * also adds more overhead and therefore may reduce throughput.
 */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+        __releases(this_rq->lock)
+        __acquires(busiest->lock)
+        __acquires(this_rq->lock)
+{
+        spin_unlock(&this_rq->lock);
+        double_rq_lock(this_rq, busiest);
+        return 1;
+}
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry.  This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(this_rq->lock)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
 {
        int ret = 0;
-        if (unlikely(!irqs_disabled())) {
-                /* printk() doesn't work good under rq->lock */
-                spin_unlock(&this_rq->lock);
-                BUG_ON(1);
-        }
        if (unlikely(!spin_trylock(&busiest->lock))) {
                if (busiest < this_rq) {
                        spin_unlock(&this_rq->lock);
@@ -1634,6 +1659,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
        return ret;
 }
+#endif /* CONFIG_PREEMPT */
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+        if (unlikely(!irqs_disabled())) {
+                /* printk() doesn't work good under rq->lock */
+                spin_unlock(&this_rq->lock);
+                BUG_ON(1);
+        }
+        return _double_lock_balance(this_rq, busiest);
+}
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
 {
@@ -2445,6 +2486,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
 #endif
+        plist_node_init(&p->pushable_tasks, MAX_PRIO);
        put_cpu();
 }
@@ -2585,6 +2628,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 {
        struct mm_struct *mm = rq->prev_mm;
        long prev_state;
+#ifdef CONFIG_SMP
+        int post_schedule = 0;
+        if (current->sched_class->needs_post_schedule)
+                post_schedule = current->sched_class->needs_post_schedule(rq);
+#endif
        rq->prev_mm = NULL;
@@ -2603,7 +2652,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        finish_arch_switch(prev);
        finish_lock_switch(rq, prev);
 #ifdef CONFIG_SMP
-        if (current->sched_class->post_schedule)
+        if (post_schedule)
                current->sched_class->post_schedule(rq);
 #endif
@@ -2984,6 +3033,16 @@ next:
        pulled++;
        rem_load_move -= p->se.load.weight;
+#ifdef CONFIG_PREEMPT
+        /*
+         * NEWIDLE balancing is a source of latency, so preemptible kernels
+         * will stop after the first task is pulled to minimize the critical
+         * section.
+         */
+        if (idle == CPU_NEWLY_IDLE)
+                goto out;
+#endif
        /*
         * We only want to steal up to the prescribed amount of weighted load.
         */
@@ -3030,9 +3089,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                                sd, idle, all_pinned, &this_best_prio);
                class = class->next;
+#ifdef CONFIG_PREEMPT
+                /*
+                 * NEWIDLE balancing is a source of latency, so preemptible
+                 * kernels will stop after the first task is pulled to minimize
+                 * the critical section.
+                 */
                if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
                        break;
+#endif
        } while (class && max_load_move > total_load_moved);
        return total_load_moved > 0;
@@ -8201,11 +8266,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        __set_bit(MAX_RT_PRIO, array->bitmap);
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-        rt_rq->highest_prio = MAX_RT_PRIO;
+        rt_rq->highest_prio.curr = MAX_RT_PRIO;
+        rt_rq->highest_prio.next = MAX_RT_PRIO;
 #endif
 #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
+        plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
 #endif
        rt_rq->rt_time = 0;

diff --git a/kernel/sched.c b/kernel/sched.c index deb5ac8c12f3..dd1a1466c1e6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -464,11 +464,15 @@ struct rt_rq {
464	struct rt_prio_array active;	464	struct rt_prio_array active;
465	unsigned long rt_nr_running;	465	unsigned long rt_nr_running;
466	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED	466	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
467	int highest_prio; /* highest queued rt task prio */	467	struct {
		468	int curr; /* highest queued rt task prio */
		469	int next; /* next highest */
		470	} highest_prio;
468	#endif	471	#endif
469	#ifdef CONFIG_SMP	472	#ifdef CONFIG_SMP
470	unsigned long rt_nr_migratory;	473	unsigned long rt_nr_migratory;
471	int overloaded;	474	int overloaded;
		475	struct plist_head pushable_tasks;
472	#endif	476	#endif
473	int rt_throttled;	477	int rt_throttled;
474	u64 rt_time;	478	u64 rt_time;
@@ -1607,21 +1611,42 @@ static inline void update_shares_locked(struct rq rq, struct sched_domain sd)
1607		1611
1608	#endif	1612	#endif
1609		1613
		1614	#ifdef CONFIG_PREEMPT
		1615
1610	/*	1616	/*
1611	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.	1617	* fair double_lock_balance: Safely acquires both rq->locks in a fair
		1618	* way at the expense of forcing extra atomic operations in all
		1619	* invocations. This assures that the double_lock is acquired using the
		1620	* same underlying policy as the spinlock_t on this architecture, which
		1621	* reduces latency compared to the unfair variant below. However, it
		1622	* also adds more overhead and therefore may reduce throughput.
1612	*/	1623	*/
1613	static int double_lock_balance(struct rq this_rq, struct rq busiest)	1624	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
		1625	__releases(this_rq->lock)
		1626	__acquires(busiest->lock)
		1627	__acquires(this_rq->lock)
		1628	{
		1629	spin_unlock(&this_rq->lock);
		1630	double_rq_lock(this_rq, busiest);
		1631
		1632	return 1;
		1633	}
		1634
		1635	#else
		1636	/*
		1637	* Unfair double_lock_balance: Optimizes throughput at the expense of
		1638	* latency by eliminating extra atomic operations when the locks are
		1639	* already in proper order on entry. This favors lower cpu-ids and will
		1640	* grant the double lock to lower cpus over higher ids under contention,
		1641	* regardless of entry order into the function.
		1642	*/
		1643	static int _double_lock_balance(struct rq this_rq, struct rq busiest)
1614	__releases(this_rq->lock)	1644	__releases(this_rq->lock)
1615	__acquires(busiest->lock)	1645	__acquires(busiest->lock)
1616	__acquires(this_rq->lock)	1646	__acquires(this_rq->lock)
1617	{	1647	{
1618	int ret = 0;	1648	int ret = 0;
1619		1649
1620	if (unlikely(!irqs_disabled())) {
1621	/* printk() doesn't work good under rq->lock */
1622	spin_unlock(&this_rq->lock);
1623	BUG_ON(1);
1624	}
1625	if (unlikely(!spin_trylock(&busiest->lock))) {	1650	if (unlikely(!spin_trylock(&busiest->lock))) {
1626	if (busiest < this_rq) {	1651	if (busiest < this_rq) {
1627	spin_unlock(&this_rq->lock);	1652	spin_unlock(&this_rq->lock);
@@ -1634,6 +1659,22 @@ static int double_lock_balance(struct rq this_rq, struct rq busiest)
1634	return ret;	1659	return ret;
1635	}	1660	}
1636		1661
		1662	#endif /* CONFIG_PREEMPT */
		1663
		1664	/*
		1665	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
		1666	*/
		1667	static int double_lock_balance(struct rq this_rq, struct rq busiest)
		1668	{
		1669	if (unlikely(!irqs_disabled())) {
		1670	/* printk() doesn't work good under rq->lock */
		1671	spin_unlock(&this_rq->lock);
		1672	BUG_ON(1);
		1673	}
		1674
		1675	return _double_lock_balance(this_rq, busiest);
		1676	}
		1677
1637	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)	1678	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)
1638	__releases(busiest->lock)	1679	__releases(busiest->lock)
1639	{	1680	{
@@ -2445,6 +2486,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
2445	/* Want to start with kernel preemption disabled. */	2486	/* Want to start with kernel preemption disabled. */
2446	task_thread_info(p)->preempt_count = 1;	2487	task_thread_info(p)->preempt_count = 1;
2447	#endif	2488	#endif
		2489	plist_node_init(&p->pushable_tasks, MAX_PRIO);
		2490
2448	put_cpu();	2491	put_cpu();
2449	}	2492	}
2450		2493
@@ -2585,6 +2628,12 @@ static void finish_task_switch(struct rq rq, struct task_struct prev)
2585	{	2628	{
2586	struct mm_struct *mm = rq->prev_mm;	2629	struct mm_struct *mm = rq->prev_mm;
2587	long prev_state;	2630	long prev_state;
		2631	#ifdef CONFIG_SMP
		2632	int post_schedule = 0;
		2633
		2634	if (current->sched_class->needs_post_schedule)
		2635	post_schedule = current->sched_class->needs_post_schedule(rq);
		2636	#endif
2588		2637
2589	rq->prev_mm = NULL;	2638	rq->prev_mm = NULL;
2590		2639
@@ -2603,7 +2652,7 @@ static void finish_task_switch(struct rq rq, struct task_struct prev)
2603	finish_arch_switch(prev);	2652	finish_arch_switch(prev);
2604	finish_lock_switch(rq, prev);	2653	finish_lock_switch(rq, prev);
2605	#ifdef CONFIG_SMP	2654	#ifdef CONFIG_SMP
2606	if (current->sched_class->post_schedule)	2655	if (post_schedule)
2607	current->sched_class->post_schedule(rq);	2656	current->sched_class->post_schedule(rq);
2608	#endif	2657	#endif
2609		2658
@@ -2984,6 +3033,16 @@ next:
2984	pulled++;	3033	pulled++;
2985	rem_load_move -= p->se.load.weight;	3034	rem_load_move -= p->se.load.weight;
2986		3035
		3036	#ifdef CONFIG_PREEMPT
		3037	/*
		3038	* NEWIDLE balancing is a source of latency, so preemptible kernels
		3039	* will stop after the first task is pulled to minimize the critical
		3040	* section.
		3041	*/
		3042	if (idle == CPU_NEWLY_IDLE)
		3043	goto out;
		3044	#endif
		3045
2987	/*	3046	/*
2988	* We only want to steal up to the prescribed amount of weighted load.	3047	* We only want to steal up to the prescribed amount of weighted load.
2989	*/	3048	*/
@@ -3030,9 +3089,15 @@ static int move_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
3030	sd, idle, all_pinned, &this_best_prio);	3089	sd, idle, all_pinned, &this_best_prio);
3031	class = class->next;	3090	class = class->next;
3032		3091
		3092	#ifdef CONFIG_PREEMPT
		3093	/*
		3094	* NEWIDLE balancing is a source of latency, so preemptible
		3095	* kernels will stop after the first task is pulled to minimize
		3096	* the critical section.
		3097	*/
3033	if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)	3098	if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
3034	break;	3099	break;
3035		3100	#endif
3036	} while (class && max_load_move > total_load_moved);	3101	} while (class && max_load_move > total_load_moved);
3037		3102
3038	return total_load_moved > 0;	3103	return total_load_moved > 0;
@@ -8201,11 +8266,13 @@ static void init_rt_rq(struct rt_rq rt_rq, struct rq rq)
8201	__set_bit(MAX_RT_PRIO, array->bitmap);	8266	__set_bit(MAX_RT_PRIO, array->bitmap);
8202		8267
8203	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED	8268	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
8204	rt_rq->highest_prio = MAX_RT_PRIO;	8269	rt_rq->highest_prio.curr = MAX_RT_PRIO;
		8270	rt_rq->highest_prio.next = MAX_RT_PRIO;
8205	#endif	8271	#endif
8206	#ifdef CONFIG_SMP	8272	#ifdef CONFIG_SMP
8207	rt_rq->rt_nr_migratory = 0;	8273	rt_rq->rt_nr_migratory = 0;
8208	rt_rq->overloaded = 0;	8274	rt_rq->overloaded = 0;
		8275	plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
8209	#endif	8276	#endif
8210		8277
8211	rt_rq->rt_time = 0;	8278	rt_rq->rt_time = 0;