sched: use a 2-d bitmap for searching lowest-pri CPU

The current code use a linear algorithm which causes scaling issues on larger SMP machines. This patch replaces that algorithm with a 2-dimensional bitmap to reduce latencies in the wake-up path. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Acked-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Gregory Haskins <ghaskins@novell.com> 2008-05-12 15:21:01 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-06-06 09:19:28 -0400
commit: 6e0534f278199f1e3dd1049b9bc19a7a5b87ada1 (patch)
tree: 25f4da14ec32927742db9f599ac779b4e83d1763 /kernel/sched_rt.c
parent: f333fdc9098b71e2687e4e9b6349fcb352960d66 (diff)
1 files changed, 21 insertions, 77 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index fefed39fafd..44b06d75416 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -391,8 +391,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
        WARN_ON(!rt_prio(rt_se_prio(rt_se)));
        rt_rq->rt_nr_running++;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-        if (rt_se_prio(rt_se) < rt_rq->highest_prio)
+        if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
+                struct rq *rq = rq_of_rt_rq(rt_rq);
                rt_rq->highest_prio = rt_se_prio(rt_se);
+                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_se_prio(rt_se));
+        }
 #endif
 #ifdef CONFIG_SMP
        if (rt_se->nr_cpus_allowed > 1) {
@@ -416,6 +419,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 static inline
 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+#ifdef CONFIG_SMP
+        int highest_prio = rt_rq->highest_prio;
+#endif
        WARN_ON(!rt_prio(rt_se_prio(rt_se)));
        WARN_ON(!rt_rq->rt_nr_running);
        rt_rq->rt_nr_running--;
@@ -439,6 +446,11 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
                rq->rt.rt_nr_migratory--;
        }
+        if (rt_rq->highest_prio != highest_prio) {
+                struct rq *rq = rq_of_rt_rq(rt_rq);
+                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio);
+        }
        update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -763,73 +775,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
 static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
-static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
-{
-        int       lowest_prio = -1;
-        int       lowest_cpu  = -1;
-        int       count       = 0;
-        int       cpu;
-        cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
-        /*
-         * Scan each rq for the lowest prio.
-         */
-        for_each_cpu_mask(cpu, *lowest_mask) {
-                struct rq *rq = cpu_rq(cpu);
-                /* We look for lowest RT prio or non-rt CPU */
-                if (rq->rt.highest_prio >= MAX_RT_PRIO) {
-                        /*
-                         * if we already found a low RT queue
-                         * and now we found this non-rt queue
-                         * clear the mask and set our bit.
-                         * Otherwise just return the queue as is
-                         * and the count==1 will cause the algorithm
-                         * to use the first bit found.
-                         */
-                        if (lowest_cpu != -1) {
-                                cpus_clear(*lowest_mask);
-                                cpu_set(rq->cpu, *lowest_mask);
-                        }
-                        return 1;
-                }
-                /* no locking for now */
-                if ((rq->rt.highest_prio > task->prio)
-                    && (rq->rt.highest_prio >= lowest_prio)) {
-                        if (rq->rt.highest_prio > lowest_prio) {
-                                /* new low - clear old data */
-                                lowest_prio = rq->rt.highest_prio;
-                                lowest_cpu = cpu;
-                                count = 0;
-                        }
-                        count++;
-                } else
-                        cpu_clear(cpu, *lowest_mask);
-        }
-        /*
-         * Clear out all the set bits that represent
-         * runqueues that were of higher prio than
-         * the lowest_prio.
-         */
-        if (lowest_cpu > 0) {
-                /*
-                 * Perhaps we could add another cpumask op to
-                 * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
-                 * Then that could be optimized to use memset and such.
-                 */
-                for_each_cpu_mask(cpu, *lowest_mask) {
-                        if (cpu >= lowest_cpu)
-                                break;
-                        cpu_clear(cpu, *lowest_mask);
-                }
-        }
-        return count;
-}
 static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 {
        int first;
@@ -851,17 +796,12 @@ static int find_lowest_rq(struct task_struct *task)
        cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
        int this_cpu = smp_processor_id();
        int cpu      = task_cpu(task);
-        int count    = find_lowest_cpus(task, lowest_mask);
-        if (!count)
+        if (task->rt.nr_cpus_allowed == 1)
-                return -1; /* No targets found */
+                return -1; /* No other targets possible */
-        /*
+        if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
-         * There is no sense in performing an optimal search if only one
+                return -1; /* No targets found */
-         * target is found.
-         */
-        if (count == 1)
-                return first_cpu(*lowest_mask);
        /*
         * At this point we have built a mask of cpus representing the
@@ -1218,6 +1158,8 @@ static void join_domain_rt(struct rq *rq)
 {
        if (rq->rt.overloaded)
                rt_set_overload(rq);
+        cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
 }
 /* Assumes rq->lock is held */
@@ -1225,6 +1167,8 @@ static void leave_domain_rt(struct rq *rq)
 {
        if (rq->rt.overloaded)
                rt_clear_overload(rq);
+        cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
 }
 /*
author	Gregory Haskins <ghaskins@novell.com>	2008-05-12 15:21:01 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-06-06 09:19:28 -0400
commit	6e0534f278199f1e3dd1049b9bc19a7a5b87ada1 (patch)
tree	25f4da14ec32927742db9f599ac779b4e83d1763 /kernel/sched_rt.c
parent	f333fdc9098b71e2687e4e9b6349fcb352960d66 (diff)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index fefed39fafd..44b06d75416 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -391,8 +391,11 @@ void inc_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
391	WARN_ON(!rt_prio(rt_se_prio(rt_se)));	391	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
392	rt_rq->rt_nr_running++;	392	rt_rq->rt_nr_running++;
393	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED	393	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
394	if (rt_se_prio(rt_se) < rt_rq->highest_prio)	394	if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
		395	struct rq *rq = rq_of_rt_rq(rt_rq);
395	rt_rq->highest_prio = rt_se_prio(rt_se);	396	rt_rq->highest_prio = rt_se_prio(rt_se);
		397	cpupri_set(&rq->rd->cpupri, rq->cpu, rt_se_prio(rt_se));
		398	}
396	#endif	399	#endif
397	#ifdef CONFIG_SMP	400	#ifdef CONFIG_SMP
398	if (rt_se->nr_cpus_allowed > 1) {	401	if (rt_se->nr_cpus_allowed > 1) {
@@ -416,6 +419,10 @@ void inc_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
416	static inline	419	static inline
417	void dec_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)	420	void dec_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
418	{	421	{
		422	#ifdef CONFIG_SMP
		423	int highest_prio = rt_rq->highest_prio;
		424	#endif
		425
419	WARN_ON(!rt_prio(rt_se_prio(rt_se)));	426	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
420	WARN_ON(!rt_rq->rt_nr_running);	427	WARN_ON(!rt_rq->rt_nr_running);
421	rt_rq->rt_nr_running--;	428	rt_rq->rt_nr_running--;
@@ -439,6 +446,11 @@ void dec_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
439	rq->rt.rt_nr_migratory--;	446	rq->rt.rt_nr_migratory--;
440	}	447	}
441		448
		449	if (rt_rq->highest_prio != highest_prio) {
		450	struct rq *rq = rq_of_rt_rq(rt_rq);
		451	cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio);
		452	}
		453
442	update_rt_migration(rq_of_rt_rq(rt_rq));	454	update_rt_migration(rq_of_rt_rq(rt_rq));
443	#endif /* CONFIG_SMP */	455	#endif /* CONFIG_SMP */
444	#ifdef CONFIG_RT_GROUP_SCHED	456	#ifdef CONFIG_RT_GROUP_SCHED
@@ -763,73 +775,6 @@ static struct task_struct pick_next_highest_task_rt(struct rq rq, int cpu)
763		775
764	static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);	776	static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
765		777
766	static int find_lowest_cpus(struct task_struct task, cpumask_t lowest_mask)
767	{
768	int lowest_prio = -1;
769	int lowest_cpu = -1;
770	int count = 0;
771	int cpu;
772
773	cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
774
775	/*
776	* Scan each rq for the lowest prio.
777	*/
778	for_each_cpu_mask(cpu, *lowest_mask) {
779	struct rq *rq = cpu_rq(cpu);
780
781	/* We look for lowest RT prio or non-rt CPU */
782	if (rq->rt.highest_prio >= MAX_RT_PRIO) {
783	/*
784	* if we already found a low RT queue
785	* and now we found this non-rt queue
786	* clear the mask and set our bit.
787	* Otherwise just return the queue as is
788	* and the count==1 will cause the algorithm
789	* to use the first bit found.
790	*/
791	if (lowest_cpu != -1) {
792	cpus_clear(*lowest_mask);
793	cpu_set(rq->cpu, *lowest_mask);
794	}
795	return 1;
796	}
797
798	/* no locking for now */
799	if ((rq->rt.highest_prio > task->prio)
800	&& (rq->rt.highest_prio >= lowest_prio)) {
801	if (rq->rt.highest_prio > lowest_prio) {
802	/* new low - clear old data */
803	lowest_prio = rq->rt.highest_prio;
804	lowest_cpu = cpu;
805	count = 0;
806	}
807	count++;
808	} else
809	cpu_clear(cpu, *lowest_mask);
810	}
811
812	/*
813	* Clear out all the set bits that represent
814	* runqueues that were of higher prio than
815	* the lowest_prio.
816	*/
817	if (lowest_cpu > 0) {
818	/*
819	* Perhaps we could add another cpumask op to
820	* zero out bits. Like cpu_zero_bits(cpumask, nrbits);
821	* Then that could be optimized to use memset and such.
822	*/
823	for_each_cpu_mask(cpu, *lowest_mask) {
824	if (cpu >= lowest_cpu)
825	break;
826	cpu_clear(cpu, *lowest_mask);
827	}
828	}
829
830	return count;
831	}
832
833	static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)	778	static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
834	{	779	{
835	int first;	780	int first;
@@ -851,17 +796,12 @@ static int find_lowest_rq(struct task_struct *task)
851	cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);	796	cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
852	int this_cpu = smp_processor_id();	797	int this_cpu = smp_processor_id();
853	int cpu = task_cpu(task);	798	int cpu = task_cpu(task);
854	int count = find_lowest_cpus(task, lowest_mask);
855		799
856	if (!count)	800	if (task->rt.nr_cpus_allowed == 1)
857	return -1; /* No targets found */	801	return -1; /* No other targets possible */
858		802
859	/*	803	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
860	* There is no sense in performing an optimal search if only one	804	return -1; /* No targets found */
861	* target is found.
862	*/
863	if (count == 1)
864	return first_cpu(*lowest_mask);
865		805
866	/*	806	/*
867	* At this point we have built a mask of cpus representing the	807	* At this point we have built a mask of cpus representing the
@@ -1218,6 +1158,8 @@ static void join_domain_rt(struct rq *rq)
1218	{	1158	{
1219	if (rq->rt.overloaded)	1159	if (rq->rt.overloaded)
1220	rt_set_overload(rq);	1160	rt_set_overload(rq);
		1161
		1162	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
1221	}	1163	}
1222		1164
1223	/* Assumes rq->lock is held */	1165	/* Assumes rq->lock is held */
@@ -1225,6 +1167,8 @@ static void leave_domain_rt(struct rq *rq)
1225	{	1167	{
1226	if (rq->rt.overloaded)	1168	if (rq->rt.overloaded)
1227	rt_clear_overload(rq);	1169	rt_clear_overload(rq);
		1170
		1171	cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1228	}	1172	}
1229		1173
1230	/*	1174	/*