From dd9b238c1c53ddcdb10cbb2548f38543d49ad70a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 19 Mar 2012 21:03:46 +0100
Subject: MAINTAINERS: Update email address for SCHEDULER and PERF EVENTS

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3321d75c6c7f..1ad6a06b0180 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5112,7 +5112,7 @@ F:	kernel/delayacct.c
 PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:	Paul Mackerras <paulus@samba.org>
-M:	Ingo Molnar <mingo@elte.hu>
+M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:	Supported
@@ -5736,7 +5736,7 @@ S:	Maintained
 F:	drivers/watchdog/sc1200wdt.c
 
 SCHEDULER
-M:	Ingo Molnar <mingo@elte.hu>
+M:	Ingo Molnar <mingo@redhat.com>
 M:	Peter Zijlstra <peterz@infradead.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
 S:	Maintained
-- 
cgit v1.2.2


From 6c16a6dcb05e51ace340ff7bc6dbe647f1593528 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 21 Mar 2012 13:07:16 -0700
Subject: sched: Fix compiler warning about declared inline after use

kernel/sched/fair.c:420: warning: 'account_cfs_rq_runtime' declared inline after being called
kernel/sched/fair.c:420: warning: previous declaration of 'account_cfs_rq_runtime' was here
kernel/sched/fair.c:1165: warning: 'return_cfs_rq_runtime' declared inlineafter being called
kernel/sched/fair.c:1165: warning: previous declaration of 'return_cfs_rq_runtime' was here

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20120321200717.49BB4A024E@akpm.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11f3979bad2a..258f430d71a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
-				   unsigned long delta_exec);
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
 
 /**************************************************************
  * Scheduling class tree data structure manipulation methods:
@@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		__clear_buddies_skip(se);
 }
 
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
 
 static void
 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
 		resched_task(rq_of(cfs_rq)->curr);
 }
 
-static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
-						   unsigned long delta_exec)
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
 {
 	if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
 		return;
@@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq)
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
-				     unsigned long delta_exec) {}
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
 static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 
 static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
 {
-- 
cgit v1.2.2


From bc758133ed73d4b06952bec21da23e28e62bf3ba Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 26 Mar 2012 13:16:15 +0200
Subject: sched/x86/smp: Do not enable IRQs over calibrate_delay()

We should not ever enable IRQs until we're fully set up. This opens up
a window where interrupts can hit the cpu and interrupts can do
wakeups, wakeups need state that isn't set-up yet, in particular this
cpu isn't elegible to run tasks, so if any cpu-affine task that got
created in CPU_UP_PREPARE manages to get a wakeup, its affinity mask
will get broken and we'll run into lots of 'interesting' problems.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-yaezmlbriluh166tfkgni22m@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58f78165d308..89571a0c4a49 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -219,14 +219,9 @@ static void __cpuinit smp_callin(void)
 	 * Update loops_per_jiffy in cpu_data. Previous call to
 	 * smp_store_cpu_info() stored a value that is close but not as
 	 * accurate as the value just calculated.
-	 *
-	 * Need to enable IRQs because it can take longer and then
-	 * the NMI watchdog might kill us.
 	 */
-	local_irq_enable();
 	calibrate_delay();
 	cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
-	local_irq_disable();
 	pr_debug("Stack at about %p\n", &cpuid);
 
 	/*
-- 
cgit v1.2.2


From 2baab4e90495ebc9826c93f79d74d6e60a828d24 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 20 Mar 2012 15:57:01 +0100
Subject: sched: Fix select_fallback_rq() vs cpu_active/cpu_online

Commit 5fbd036b55 ("sched: Cleanup cpu_active madness"), which was
supposed to finally sort the cpu_active mess, instead uncovered more.

Since CPU_STARTING is ran before setting the cpu online, there's a
(small) window where the cpu has active,!online.

If during this time there's a wakeup of a task that used to reside on
that cpu select_task_rq() will use select_fallback_rq() to compute an
alternative cpu to run on since we find !online.

select_fallback_rq() however will compute the new cpu against
cpu_active, this means that it can return the same cpu it started out
with, the !online one, since that cpu is in fact marked active.

This results in us trying to scheduling a task on an offline cpu and
triggering a WARN in the IPI code.

The solution proposed by Chuansheng Liu of setting cpu_active in
set_cpu_online() is buggy, firstly not all archs actually use
set_cpu_online(), secondly, not all archs call set_cpu_online() with
IRQs disabled, this means we would introduce either the same race or
the race from fd8a7de17 ("x86: cpu-hotplug: Prevent softirq wakeup on
wrong CPU") -- albeit much narrower.

[ By setting online first and active later we have a window of
  online,!active, fresh and bound kthreads have task_cpu() of 0 and
  since cpu0 isn't in tsk_cpus_allowed() we end up in
  select_fallback_rq() which excludes !active, resulting in a reset
  of ->cpus_allowed and the thread running all over the place. ]

The solution is to re-work select_fallback_rq() to require active
_and_ online. This makes the active,!online case work as expected,
OTOH archs running CPU_STARTING after setting online are now
vulnerable to the issue from fd8a7de17 -- these are alpha and
blackfin.

Reported-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: linux-alpha@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-hubqk1i10o4dpvlm06gq7v6j@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h |  6 ++---
 kernel/cpuset.c        | 20 ++++------------
 kernel/sched/core.c    | 62 +++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index e9eaec522655..e0ffaf061ab7 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -22,7 +22,7 @@ extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
-extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
+extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -144,10 +144,8 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
 	cpumask_copy(mask, cpu_possible_mask);
 }
 
-static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
+static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
 {
-	do_set_cpus_allowed(p, cpu_possible_mask);
-	return cpumask_any(cpu_active_mask);
 }
 
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a09ac2b9a661..c9837b74ab96 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2195,7 +2195,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 	mutex_unlock(&callback_mutex);
 }
 
-int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 {
 	const struct cpuset *cs;
 	int cpu;
@@ -2219,22 +2219,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
 	 * set any mask even if it is not right from task_cs() pov,
 	 * the pending set_cpus_allowed_ptr() will fix things.
+	 *
+	 * select_fallback_rq() will fix things ups and set cpu_possible_mask
+	 * if required.
 	 */
-
-	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
-	if (cpu >= nr_cpu_ids) {
-		/*
-		 * Either tsk->cpus_allowed is wrong (see above) or it
-		 * is actually empty. The latter case is only possible
-		 * if we are racing with remove_tasks_in_empty_cpuset().
-		 * Like above we can temporary set any mask and rely on
-		 * set_cpus_allowed_ptr() as synchronization point.
-		 */
-		do_set_cpus_allowed(tsk, cpu_possible_mask);
-		cpu = cpumask_any(cpu_active_mask);
-	}
-
-	return cpu;
 }
 
 void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e3ccc13c4caa..9c1629c90b2d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1263,29 +1263,59 @@ EXPORT_SYMBOL_GPL(kick_process);
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
-	int dest_cpu;
 	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+	enum { cpuset, possible, fail } state = cpuset;
+	int dest_cpu;
 
 	/* Look for allowed, online CPU in same node. */
-	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+	for_each_cpu_mask(dest_cpu, *nodemask) {
+		if (!cpu_online(dest_cpu))
+			continue;
+		if (!cpu_active(dest_cpu))
+			continue;
 		if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
 			return dest_cpu;
+	}
 
-	/* Any allowed, online CPU? */
-	dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
-	if (dest_cpu < nr_cpu_ids)
-		return dest_cpu;
+	for (;;) {
+		/* Any allowed, online CPU? */
+		for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) {
+			if (!cpu_online(dest_cpu))
+				continue;
+			if (!cpu_active(dest_cpu))
+				continue;
+			goto out;
+		}
 
-	/* No more Mr. Nice Guy. */
-	dest_cpu = cpuset_cpus_allowed_fallback(p);
-	/*
-	 * Don't tell them about moving exiting tasks or
-	 * kernel threads (both mm NULL), since they never
-	 * leave kernel.
-	 */
-	if (p->mm && printk_ratelimit()) {
-		printk_sched("process %d (%s) no longer affine to cpu%d\n",
-				task_pid_nr(p), p->comm, cpu);
+		switch (state) {
+		case cpuset:
+			/* No more Mr. Nice Guy. */
+			cpuset_cpus_allowed_fallback(p);
+			state = possible;
+			break;
+
+		case possible:
+			do_set_cpus_allowed(p, cpu_possible_mask);
+			state = fail;
+			break;
+
+		case fail:
+			BUG();
+			break;
+		}
+	}
+
+out:
+	if (state != cpuset) {
+		/*
+		 * Don't tell them about moving exiting tasks or
+		 * kernel threads (both mm NULL), since they never
+		 * leave kernel.
+		 */
+		if (p->mm && printk_ratelimit()) {
+			printk_sched("process %d (%s) no longer affine to cpu%d\n",
+					task_pid_nr(p), p->comm, cpu);
+		}
 	}
 
 	return dest_cpu;
-- 
cgit v1.2.2


From 1b028abc779b67b699daff55e27d2432f8d92666 Mon Sep 17 00:00:00 2001
From: Michael J Wang <mjwang@broadcom.com>
Date: Mon, 19 Mar 2012 22:26:19 +0000
Subject: sched/rt: Improve pick_next_highest_task_rt()

Avoid extra work by continuing on to the next rt_rq if the highest
prio task in current rt_rq is the same priority as our candidate
task.

More detailed explanation:  if next is not NULL, then we have found a
candidate task, and its priority is next->prio.  Now we are looking
for an even higher priority task in the other rt_rq's.  idx is the
highest priority in the current candidate rt_rq.  In the current 3.3
code, if idx is equal to next->prio, we would start scanning the tasks
in that rt_rq and replace the current candidate task with a task from
that rt_rq.  But the new task would only have a priority that is equal
to our previous candidate task, so we have not advanced our goal of
finding a higher prio task.  So we should avoid the extra work by
continuing on to the next rt_rq if idx is equal to next->prio.

Signed-off-by: Michael J Wang <mjwang@broadcom.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/2EF88150C0EF2C43A218742ED384C1BC0FC83D6B@IRVEXCHMB08.corp.ad.broadcom.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b60dad720173..44af55e6d5d0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
 next_idx:
 		if (idx >= MAX_RT_PRIO)
 			continue;
-		if (next && next->prio < idx)
+		if (next && next->prio <= idx)
 			continue;
 		list_for_each_entry(rt_se, array->queue + idx, run_list) {
 			struct task_struct *p;
-- 
cgit v1.2.2


From 160594e99dbbb0a5600ad922c630952c7c1c14bf Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 28 Mar 2012 13:46:09 +0300
Subject: cpusets: Remove an unused variable

We don't use "cpu" any more after 2baab4e904 "sched: Fix
select_fallback_rq() vs cpu_active/cpu_online".

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Paul Menage <paul@paulmenage.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120328104608.GD29022@elgon.mountain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/cpuset.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c9837b74ab96..4ef4d7ecb9fb 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2198,7 +2198,6 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 {
 	const struct cpuset *cs;
-	int cpu;
 
 	rcu_read_lock();
 	cs = task_cs(tsk);
-- 
cgit v1.2.2