From ff081e05bfba3461119cd280201d163b6858eda2 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 6 Jul 2012 22:03:42 +0100
Subject: ARM: 7457/1: smp: Fix suspicious RCU originating from cpu_die()

While running hotplug tests I ran into this RCU splat

===============================
[ INFO: suspicious RCU usage. ]
3.4.0 #3275 Tainted: G        W
-------------------------------
include/linux/rcupdate.h:729 rcu_read_lock() used illegally while idle!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
4 locks held by swapper/2/0:
 #0:  ((cpu_died).wait.lock){......}, at: [<c00ab128>] complete+0x1c/0x5c
 #1:  (&p->pi_lock){-.-.-.}, at: [<c00b275c>] try_to_wake_up+0x2c/0x388
 #2:  (&rq->lock){-.-.-.}, at: [<c00b2860>] try_to_wake_up+0x130/0x388
 #3:  (rcu_read_lock){.+.+..}, at: [<c00abe5c>] cpuacct_charge+0x28/0x1f4

stack backtrace:
[<c001521c>] (unwind_backtrace+0x0/0x12c) from [<c00abec8>] (cpuacct_charge+0x94/0x1f4)
[<c00abec8>] (cpuacct_charge+0x94/0x1f4) from [<c00b395c>] (update_curr+0x24c/0x2c8)
[<c00b395c>] (update_curr+0x24c/0x2c8) from [<c00b59c4>] (enqueue_task_fair+0x50/0x194)
[<c00b59c4>] (enqueue_task_fair+0x50/0x194) from [<c00afea4>] (enqueue_task+0x30/0x34)
[<c00afea4>] (enqueue_task+0x30/0x34) from [<c00b0908>] (ttwu_activate+0x14/0x38)
[<c00b0908>] (ttwu_activate+0x14/0x38) from [<c00b28a8>] (try_to_wake_up+0x178/0x388)
[<c00b28a8>] (try_to_wake_up+0x178/0x388) from [<c00a82a0>] (__wake_up_common+0x34/0x78)
[<c00a82a0>] (__wake_up_common+0x34/0x78) from [<c00ab154>] (complete+0x48/0x5c)
[<c00ab154>] (complete+0x48/0x5c) from [<c07db7cc>] (cpu_die+0x2c/0x58)
[<c07db7cc>] (cpu_die+0x2c/0x58) from [<c000f954>] (cpu_idle+0x64/0xfc)
[<c000f954>] (cpu_idle+0x64/0xfc) from [<80208160>] (0x80208160)

When a cpu is marked offline during its idle thread it calls
cpu_die() during an RCU idle period. cpu_die() calls complete()
to notify the killing process that the cpu has died. complete()
calls into the scheduler code and eventually grabs an RCU read
lock in cpuacct_charge().

Mark complete() as RCU_NONIDLE so that RCU pays attention to this
CPU for the duration of the complete() function even though it's
in idle.

Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kernel')
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2c7217d971db..aea74f5bc34a 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -179,7 +179,7 @@ void __ref cpu_die(void)
 	mb();
 
 	/* Tell __cpu_die() that this CPU is now safe to dispose of */
-	complete(&cpu_died);
+	RCU_NONIDLE(complete(&cpu_died));
 
 	/*
 	 * actual CPU shutdown procedure is at least platform (if not
-- 
cgit v1.2.2


From 02df19b4227e5b799e4642e88b568f9474fa78d0 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Fri, 15 Jun 2012 10:23:32 +0100
Subject: ARM: 7424/1: update die handler from x86

Robustify ARM's die() handling with improvements from x86:

 - Fix for a deadlock (before panic in the case of panic_on_oops) if we
   oops under a spinlock which is also used from interrupt handler,
   since the old code was unconditionally enabling interrupts.

 - Usage of arch spinlock so lockdep etc doesn't get involved while
   we're trying to dump out oopses.

 - Deadlock prevention in the unlikely event that die() recurses.

The changes all touch the same few lines of code, so they're done
together in one patch.

Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 78 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 23 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 4928d89758f4..2df8715c36c0 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -233,9 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 #define S_ISA " ARM"
 #endif
 
-static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
 {
-	struct task_struct *tsk = thread->task;
+	struct task_struct *tsk = current;
 	static int die_counter;
 	int ret;
 
@@ -245,12 +245,12 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
 	if (ret == NOTIFY_STOP)
-		return ret;
+		return 1;
 
 	print_modules();
 	__show_regs(regs);
 	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
@@ -259,45 +259,77 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 		dump_instr(KERN_EMERG, regs);
 	}
 
-	return ret;
+	return 0;
 }
 
-static DEFINE_RAW_SPINLOCK(die_lock);
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
 
-/*
- * This function is protected against re-entrancy.
- */
-void die(const char *str, struct pt_regs *regs, int err)
+static unsigned long oops_begin(void)
 {
-	struct thread_info *thread = current_thread_info();
-	int ret;
-	enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+	int cpu;
+	unsigned long flags;
 
 	oops_enter();
 
-	raw_spin_lock_irq(&die_lock);
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			arch_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
 	console_verbose();
 	bust_spinlocks(1);
-	if (!user_mode(regs))
-		bug_type = report_bug(regs->ARM_pc, regs);
-	if (bug_type != BUG_TRAP_TYPE_NONE)
-		str = "Oops - BUG";
-	ret = __die(str, err, thread, regs);
+	return flags;
+}
 
-	if (regs && kexec_should_crash(thread->task))
+static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
 
 	bust_spinlocks(0);
+	die_owner = -1;
 	add_taint(TAINT_DIE);
-	raw_spin_unlock_irq(&die_lock);
+	die_nest_count--;
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		arch_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
 	oops_exit();
 
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	if (ret != NOTIFY_STOP)
-		do_exit(SIGSEGV);
+	if (signr)
+		do_exit(signr);
+}
+
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
+{
+	enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
+
+	if (!user_mode(regs))
+		bug_type = report_bug(regs->ARM_pc, regs);
+	if (bug_type != BUG_TRAP_TYPE_NONE)
+		str = "Oops - BUG";
+
+	if (__die(str, err, regs))
+		sig = 0;
+
+	oops_end(flags, regs, sig);
 }
 
 void arm_notify_die(const char *str, struct pt_regs *regs,
-- 
cgit v1.2.2


From 9fa16b7755967b1b4dc704de71a3d639d13e21fc Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 4 Jul 2012 04:58:12 +0100
Subject: ARM: 7439/1: head.S: simplify initial page table mapping

Let's map the initial RAM up to the end of the kernel .bss instead of
the strict kernel image area.  This simplifies the code as the kernel
image only needs to be handled specially in the XIP case.  That covers
the legacy ATAG location as well.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head.S | 59 ++++++++++++++++++++------------------------------
 1 file changed, 23 insertions(+), 36 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 835898e7d704..3db960e20cb8 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -55,14 +55,6 @@
 	add	\rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE
 	.endm
 
-#ifdef CONFIG_XIP_KERNEL
-#define KERNEL_START	XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
-#define KERNEL_END	_edata_loc
-#else
-#define KERNEL_START	KERNEL_RAM_VADDR
-#define KERNEL_END	_end
-#endif
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -218,51 +210,46 @@ __create_page_tables:
 	blo	1b
 
 	/*
-	 * Now setup the pagetables for our kernel direct
-	 * mapped region.
+	 * Map our RAM from the start to the end of the kernel .bss section.
 	 */
-	mov	r3, pc
-	mov	r3, r3, lsr #SECTION_SHIFT
-	orr	r3, r7, r3, lsl #SECTION_SHIFT
-	add	r0, r4,  #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
-	ldr	r6, =(KERNEL_END - 1)
-	add	r0, r0, #1 << PMD_ORDER
+	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
+	ldr	r6, =(_end - 1)
+	orr	r3, r8, r7
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
-1:	cmp	r0, r6
+1:	str	r3, [r0], #1 << PMD_ORDER
 	add	r3, r3, #1 << SECTION_SHIFT
-	strls	r3, [r0], #1 << PMD_ORDER
+	cmp	r0, r6
 	bls	1b
 
 #ifdef CONFIG_XIP_KERNEL
 	/*
-	 * Map some ram to cover our .data and .bss areas.
+	 * Map the kernel image separately as it is not located in RAM.
 	 */
-	add	r3, r8, #TEXT_OFFSET
-	orr	r3, r3, r7
-	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]!
-	ldr	r6, =(_end - 1)
-	add	r0, r0, #4
+#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
+	mov	r3, pc
+	mov	r3, r3, lsr #SECTION_SHIFT
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
+	ldr	r6, =(_edata_loc - 1)
+	add	r0, r0, #1 << PMD_ORDER
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
 1:	cmp	r0, r6
-	add	r3, r3, #1 << 20
-	strls	r3, [r0], #4
+	add	r3, r3, #1 << SECTION_SHIFT
+	strls	r3, [r0], #1 << PMD_ORDER
 	bls	1b
 #endif
 
 	/*
-	 * Then map boot params address in r2 or the first 1MB (2MB with LPAE)
-	 * of ram if boot params address is not specified.
+	 * Then map boot params address in r2 if specified.
 	 */
 	mov	r0, r2, lsr #SECTION_SHIFT
 	movs	r0, r0, lsl #SECTION_SHIFT
-	moveq	r0, r8
-	sub	r3, r0, r8
-	add	r3, r3, #PAGE_OFFSET
-	add	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
-	orr	r6, r7, r0
-	str	r6, [r3]
+	subne	r3, r0, r8
+	addne	r3, r3, #PAGE_OFFSET
+	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
+	orrne	r6, r7, r0
+	strne	r6, [r3]
 
 #ifdef CONFIG_DEBUG_LL
 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
-- 
cgit v1.2.2


From 27a5569dc66ecce06cb532542ddcd0b6da8783f6 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <Lorenzo.Pieralisi@arm.com>
Date: Fri, 6 Jul 2012 11:06:49 +0100
Subject: ARM: 7444/1: kernel: add arch-timer C3STOP feature

When a CPU is shutdown its architected timer comparators registers are
lost. Within CPU idle, before processors enter shutdown they enter
clock events broadcast mode through the

clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, cpuid);

function where the local timers are emulated by a global always-on timer.
On CPU resume, the per-CPU tick device normal mode is restored by exiting
broadcast mode through

clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, cpuid);

In order for this mechanism to function, architected timers should add to
their feature C3STOP, which means that they are not able to function when the
CPU is in off-mode.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index dd58035621f7..df44c8cf9e2e 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -137,7 +137,7 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
 	/* Be safe... */
 	arch_timer_disable();
 
-	clk->features = CLOCK_EVT_FEAT_ONESHOT;
+	clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP;
 	clk->name = "arch_sys_timer";
 	clk->rating = 450;
 	clk->set_mode = arch_timer_set_mode;
-- 
cgit v1.2.2


From 4295b898f5a5c7e62ae68e7a4ecc4b414622ffe6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:45:00 +0100
Subject: ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration

In order to provide PMU name strings compatible with the OProfile
user ABI, an enumeration of all PMUs is currently used by perf to
identify each PMU uniquely. Unfortunately, this does not scale well
in the presence of multiple PMUs and creates a single, global namespace
across all PMUs in the system.

This patch removes the enumeration and instead uses the name string
for the PMU to map onto the OProfile variant. perf_pmu_name is
implemented for CPU PMUs, which is all that OProfile cares about anyway.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event.c        | 15 ++++++---------
 arch/arm/kernel/perf_event_v6.c     |  2 --
 arch/arm/kernel/perf_event_v7.c     |  5 -----
 arch/arm/kernel/perf_event_xscale.c |  2 --
 4 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 186c8cb982c5..df85eda3add3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -47,17 +47,14 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
-enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void)
+const char *perf_pmu_name(void)
 {
-	int id = -ENODEV;
-
-	if (cpu_pmu != NULL)
-		id = cpu_pmu->id;
+	if (!cpu_pmu)
+		return NULL;
 
-	return id;
+	return cpu_pmu->pmu.name;
 }
-EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
+EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
@@ -760,7 +757,7 @@ init_hw_perf_events(void)
 			cpu_pmu->name, cpu_pmu->num_events);
 		cpu_pmu_init(cpu_pmu);
 		register_cpu_notifier(&pmu_cpu_notifier);
-		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
+		armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index ab627a740fa3..c90fcb2b6967 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -650,7 +650,6 @@ static int armv6_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu armv6pmu = {
-	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
 	.enable			= armv6pmu_enable_event,
@@ -685,7 +684,6 @@ static int armv6mpcore_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu armv6mpcore_pmu = {
-	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
 	.enable			= armv6pmu_enable_event,
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index d3c536068162..f04070bd2183 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1258,7 +1258,6 @@ static u32 __init armv7_read_num_pmnc_events(void)
 
 static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
 	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1267,7 +1266,6 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
 	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1276,7 +1274,6 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
 	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1285,7 +1282,6 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
 	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1295,7 +1291,6 @@ static struct arm_pmu *__init armv7_a15_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a7_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA7;
 	armv7pmu.name		= "ARMv7 Cortex-A7";
 	armv7pmu.map_event	= armv7_a7_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index e34e7254e652..f759fe0bab63 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -435,7 +435,6 @@ static int xscale_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu xscale1pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
 	.enable		= xscale1pmu_enable_event,
@@ -803,7 +802,6 @@ xscale2pmu_write_counter(int counter, u32 val)
 }
 
 static struct arm_pmu xscale2pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
 	.enable		= xscale2pmu_enable_event,
-- 
cgit v1.2.2


From 8c56cc8be5b38e3684eba96dc9b3f7ca7e495755 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:45:39 +0100
Subject: ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions

This patch implements the word-at-a-time interface for ARM using the
same algorithm as x86. We use the fls macro from ARMv5 onwards, where
we have a clz instruction available which saves us a mov instruction
when targetting Thumb-2. For older CPUs, we use the magic 0x0ff0001
constant. Big-endian configurations make use of the implementation from
asm-generic.

With this implemented, we can replace our byte-at-a-time strnlen_user
and strncpy_from_user functions with the optimised generic versions.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/armksyms.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index b57c75e0b01f..c3dff6abc89d 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,10 +87,6 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
-	/* user mem (segment) */
-EXPORT_SYMBOL(__strnlen_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
-- 
cgit v1.2.2


From 923df96b9f31b7d08d8438ff9677326d9537accf Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:46:45 +0100
Subject: ARM: 7451/1: arch timer: implement read_current_timer and get_cycles

This patch implements read_current_timer using the architected timers
when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected
not to be usable at runtime, we return -ENXIO to the caller.

Furthermore, if read_current_timer is exported then we can implement
get_cycles in terms of it for use as both an entropy source and for
implementing __udelay and friends.

Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/arch_timer.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index dd58035621f7..dbbeec4f06e2 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -223,6 +223,14 @@ static cycle_t arch_counter_read(struct clocksource *cs)
 	return arch_counter_get_cntpct();
 }
 
+int read_current_timer(unsigned long *timer_val)
+{
+	if (!arch_timer_rate)
+		return -ENXIO;
+	*timer_val = arch_counter_get_cntpct();
+	return 0;
+}
+
 static struct clocksource clocksource_counter = {
 	.name	= "arch_sys_counter",
 	.rating	= 400,
-- 
cgit v1.2.2


From d0a533b18235d36206b9b422efadb7cee444dfdb Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:47:17 +0100
Subject: ARM: 7452/1: delay: allow timer-based delay implementation to be
 selected

This patch allows a timer-based delay implementation to be selected by
switching the delay routines over to use get_cycles, which is
implemented in terms of read_current_timer. This further allows us to
skip the loop calibration and have a consistent delay function in the
face of core frequency scaling.

To avoid the pain of dealing with memory-mapped counters, this
implementation uses the co-processor interface to the architected timers
when they are available. The previous loop-based implementation is
kept around for CPUs without the architected timers and we retain both
the maximum delay (2ms) and the corresponding conversion factors for
determining the number of loops required for a given interval. Since the
indirection of the timer routines will only work when called from C,
the sa1100 sleep routines are modified to branch to the loop-based delay
functions directly.

Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/arch_timer.c | 3 +++
 arch/arm/kernel/armksyms.c   | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index dbbeec4f06e2..675cee09c014 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -32,6 +32,8 @@ static int arch_timer_ppi2;
 
 static struct clock_event_device __percpu **arch_timer_evt;
 
+extern void init_current_timer_delay(unsigned long freq);
+
 /*
  * Architected system timer support.
  */
@@ -304,6 +306,7 @@ static int __init arch_timer_register(void)
 	if (err)
 		goto out_free_irq;
 
+	init_current_timer_delay(arch_timer_rate);
 	return 0;
 
 out_free_irq:
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index b57c75e0b01f..71962284d288 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,8 +49,7 @@ extern void __aeabi_ulcmp(void);
 extern void fpundefinstr(void);
 
 	/* platform dependent support */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__const_udelay);
+EXPORT_SYMBOL(arm_delay_ops);
 
 	/* networking */
 EXPORT_SYMBOL(csum_partial);
-- 
cgit v1.2.2


From 64284a9f8a91b2f1af577f78fabe05d8072cb6e6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:48:50 +0100
Subject: ARM: 7454/1: entry: don't bother with syscall tracing on
 ret_from_fork path

ret_from_fork is setup for a freshly spawned child task via copy_thread,
called from copy_process. The latter function clears TIF_SYSCALL_TRACE
and also resets the child task's audit_context to NULL, meaning that
there is little point invoking the system call tracing routines.
Furthermore, getting hold of the syscall number is a complete pain and
it looks like the current code doesn't even bother.

This patch removes the syscall tracing checks from ret_from_fork.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 4afed88d250a..10911c93fbf1 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -95,13 +95,7 @@ ENDPROC(ret_to_user)
 ENTRY(ret_from_fork)
 	bl	schedule_tail
 	get_thread_info tsk
-	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	mov	why, #1
-	tst	r1, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
-	beq	ret_slow_syscall
-	mov	r1, sp
-	mov	r0, #1				@ trace exit [IP = 1]
-	bl	syscall_trace
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
-- 
cgit v1.2.2


From 5125430cccc41f67bfe024394a302901034f6d39 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:49:27 +0100
Subject: ARM: 7455/1: audit: move syscall auditing until after ptrace SIGTRAP
 handling

When auditing system calls on ARM, the audit code is called before
notifying the parent process in the case that the current task is being
ptraced. At this point, the parent (debugger) may choose to change the
system call being issued via the SET_SYSCALL ptrace request, causing
the wrong system call to be reported to the audit tools.

This patch moves the audit calls after the ptrace SIGTRAP handling code
in the syscall tracing implementation.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/ptrace.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 14e38261cd31..592a39d0ef31 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -911,14 +911,8 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
 {
 	unsigned long ip;
 
-	if (why)
-		audit_syscall_exit(regs);
-	else
-		audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0,
-				    regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
-
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return scno;
+		goto out_no_trace;
 
 	current_thread_info()->syscall = scno;
 
@@ -935,6 +929,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
 		current_thread_info()->syscall = -1;
 
 	regs->ARM_ip = ip;
+	scno = current_thread_info()->syscall;
 
-	return current_thread_info()->syscall;
+out_no_trace:
+	if (why)
+		audit_syscall_exit(regs);
+	else
+		audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0,
+				    regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	return scno;
 }
-- 
cgit v1.2.2


From ad722541147e6e517a2077e3d944105e7bc4fa8e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Jul 2012 15:50:14 +0100
Subject: ARM: 7456/1: ptrace: provide separate functions for tracing syscall
 {entry,exit}

The syscall_trace on ARM takes a `why' parameter to indicate whether or
not we are entering or exiting a system call. This can be confusing for
people looking at the code since (a) it conflicts with the why register
alias in the entry assembly code and (b) it is not immediately clear
what it represents.

This patch splits up the syscall_trace function into separate wrappers
for syscall entry and exit, allowing the low-level syscall handling
code to branch to the appropriate function.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 14 ++++++--------
 arch/arm/kernel/ptrace.c       | 37 +++++++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 20 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 10911c93fbf1..49d9f9305247 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -442,10 +442,9 @@ ENDPROC(vector_swi)
 	 * context switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	r2, scno
-	add	r1, sp, #S_OFF
-	mov	r0, #0				@ trace entry [IP = 0]
-	bl	syscall_trace
+	mov	r1, scno
+	add	r0, sp, #S_OFF
+	bl	syscall_trace_enter
 
 	adr	lr, BSYM(__sys_trace_return)	@ return address
 	mov	scno, r0			@ syscall number (possibly new)
@@ -457,10 +456,9 @@ __sys_trace:
 
 __sys_trace_return:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-	mov	r2, scno
-	mov	r1, sp
-	mov	r0, #1				@ trace exit [IP = 1]
-	bl	syscall_trace
+	mov	r1, scno
+	mov	r0, sp
+	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
 	.align	5
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 592a39d0ef31..dab711e6e1ca 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -907,12 +907,18 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
+				enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
 
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		goto out_no_trace;
+		return scno;
 
 	current_thread_info()->syscall = scno;
 
@@ -921,21 +927,28 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
 	 * IP = 0 -> entry, =1 -> exit
 	 */
 	ip = regs->ARM_ip;
-	regs->ARM_ip = why;
+	regs->ARM_ip = dir;
 
-	if (why)
+	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
 		current_thread_info()->syscall = -1;
 
 	regs->ARM_ip = ip;
-	scno = current_thread_info()->syscall;
+	return current_thread_info()->syscall;
+}
 
-out_no_trace:
-	if (why)
-		audit_syscall_exit(regs);
-	else
-		audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0,
-				    regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
-	return scno;
+asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+{
+	int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER);
+	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
+			    regs->ARM_r2, regs->ARM_r3);
+	return ret;
+}
+
+asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno)
+{
+	int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT);
+	audit_syscall_exit(regs);
+	return ret;
 }
-- 
cgit v1.2.2


From 130d9aabf997bd8449ff4e877fe3c42df066805e Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 10 Jul 2012 14:08:40 +0100
Subject: ARM: 7461/1: topology: Add arch_scale_freq_power function

Add infrastructure to be able to modify the cpu_power of each core

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/topology.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 8200deaa14f6..51f23b3ed0a6 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -22,6 +22,37 @@
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
+/*
+ * cpu power scale management
+ */
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+	per_cpu(cpu_scale, cpu) = power;
+}
+
+/*
+ * cpu topology management
+ */
+
 #define MPIDR_SMP_BITMASK (0x3 << 30)
 #define MPIDR_SMP_VALUE (0x2 << 30)
 
@@ -41,6 +72,9 @@
 #define MPIDR_LEVEL2_MASK 0xFF
 #define MPIDR_LEVEL2_SHIFT 16
 
+/*
+ * cpu topology table
+ */
 struct cputopo_arm cpu_topology[NR_CPUS];
 
 const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -134,7 +168,7 @@ void init_cpu_topology(void)
 {
 	unsigned int cpu;
 
-	/* init core mask */
+	/* init core mask and power*/
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -143,6 +177,8 @@ void init_cpu_topology(void)
 		cpu_topo->socket_id = -1;
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
+
+		set_power_scale(cpu, SCHED_POWER_SCALE);
 	}
 	smp_wmb();
 }
-- 
cgit v1.2.2


From cb75dacb39494164e6b1f7aa747fb639bf18584c Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 10 Jul 2012 14:11:11 +0100
Subject: ARM: 7462/1: topology: factorize the update of sibling masks

This factorization has also been proposed in another patch that has not been
merged yet:
http://lists.infradead.org/pipermail/linux-arm-kernel/2012-January/080873.html
So, this patch could be dropped depending of the state of the other one.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/topology.c | 48 ++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 51f23b3ed0a6..eb5fc8132c02 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -82,6 +82,32 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 	return &cpu_topology[cpu].core_sibling;
 }
 
+void update_siblings_masks(unsigned int cpuid)
+{
+	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->socket_id != cpu_topo->socket_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
 /*
  * store_cpu_topology is called at boot when only one cpu is running
  * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
@@ -91,7 +117,6 @@ void store_cpu_topology(unsigned int cpuid)
 {
 	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
 	unsigned int mpidr;
-	unsigned int cpu;
 
 	/* If the cpu topology has been already set, just return */
 	if (cpuid_topo->core_id != -1)
@@ -133,26 +158,7 @@ void store_cpu_topology(unsigned int cpuid)
 		cpuid_topo->socket_id = -1;
 	}
 
-	/* update core and thread sibling masks */
-	for_each_possible_cpu(cpu) {
-		struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
-
-		if (cpuid_topo->socket_id == cpu_topo->socket_id) {
-			cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-			if (cpu != cpuid)
-				cpumask_set_cpu(cpu,
-					&cpuid_topo->core_sibling);
-
-			if (cpuid_topo->core_id == cpu_topo->core_id) {
-				cpumask_set_cpu(cpuid,
-					&cpu_topo->thread_sibling);
-				if (cpu != cpuid)
-					cpumask_set_cpu(cpu,
-						&cpuid_topo->thread_sibling);
-			}
-		}
-	}
-	smp_wmb();
+	update_siblings_masks(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
-- 
cgit v1.2.2


From 339ca09d7adac80eda8d097ab473c6c23ee86b17 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 10 Jul 2012 14:13:12 +0100
Subject: ARM: 7463/1: topology: Update cpu_power according to DT information

Use cpu compatibility field and clock-frequency field of DT to
estimate the capacity of each core of the system and to update
the cpu_power field accordingly.
This patch enables to put more running tasks on big cores than
on LITTLE ones. But this patch doesn't ensure that long running
tasks will run on big cores and short ones on LITTLE cores.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/topology.c | 153 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index eb5fc8132c02..198b08456e90 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -17,7 +17,9 @@
 #include <linux/percpu.h>
 #include <linux/node.h>
 #include <linux/nodemask.h>
+#include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #include <asm/cputype.h>
 #include <asm/topology.h>
@@ -49,6 +51,152 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
 	per_cpu(cpu_scale, cpu) = power;
 }
 
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+	const char *compatible;
+	unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+struct cpu_efficiency table_efficiency[] = {
+	{"arm,cortex-a15", 3891},
+	{"arm,cortex-a7",  2048},
+	{NULL, },
+};
+
+struct cpu_capacity {
+	unsigned long hwid;
+	unsigned long capacity;
+};
+
+struct cpu_capacity *cpu_capacity;
+
+unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to  (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static void __init parse_dt_topology(void)
+{
+	struct cpu_efficiency *cpu_eff;
+	struct device_node *cn = NULL;
+	unsigned long min_capacity = (unsigned long)(-1);
+	unsigned long max_capacity = 0;
+	unsigned long capacity = 0;
+	int alloc_size, cpu = 0;
+
+	alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
+	cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT);
+
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+		const u32 *rate, *reg;
+		int len;
+
+		if (cpu >= num_possible_cpus())
+			break;
+
+		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+			if (of_device_is_compatible(cn, cpu_eff->compatible))
+				break;
+
+		if (cpu_eff->compatible == NULL)
+			continue;
+
+		rate = of_get_property(cn, "clock-frequency", &len);
+		if (!rate || len != 4) {
+			pr_err("%s missing clock-frequency property\n",
+				cn->full_name);
+			continue;
+		}
+
+		reg = of_get_property(cn, "reg", &len);
+		if (!reg || len != 4) {
+			pr_err("%s missing reg property\n", cn->full_name);
+			continue;
+		}
+
+		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+		/* Save min capacity of the system */
+		if (capacity < min_capacity)
+			min_capacity = capacity;
+
+		/* Save max capacity of the system */
+		if (capacity > max_capacity)
+			max_capacity = capacity;
+
+		cpu_capacity[cpu].capacity = capacity;
+		cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
+	}
+
+	if (cpu < num_possible_cpus())
+		cpu_capacity[cpu].hwid = (unsigned long)(-1);
+
+	/* If min and max capacities are equals, we bypass the update of the
+	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
+	 * compute a middle_capacity factor that will ensure that the capacity
+	 * of an 'average' CPU of the system will be as close as possible to
+	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * constraint explained near table_efficiency[].
+	 */
+	if (min_capacity == max_capacity)
+		cpu_capacity[0].hwid = (unsigned long)(-1);
+	else if (4*max_capacity < (3*(max_capacity + min_capacity)))
+		middle_capacity = (min_capacity + max_capacity)
+				>> (SCHED_POWER_SHIFT+1);
+	else
+		middle_capacity = ((max_capacity / 3)
+				>> (SCHED_POWER_SHIFT-1)) + 1;
+
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_capacity table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+void update_cpu_power(unsigned int cpu, unsigned long hwid)
+{
+	unsigned int idx = 0;
+
+	/* look for the cpu's hwid in the cpu capacity table */
+	for (idx = 0; idx < num_possible_cpus(); idx++) {
+		if (cpu_capacity[idx].hwid == hwid)
+			break;
+
+		if (cpu_capacity[idx].hwid == -1)
+			return;
+	}
+
+	if (idx == num_possible_cpus())
+		return;
+
+	set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
+
+	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
+		cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
+#endif
+
+
 /*
  * cpu topology management
  */
@@ -62,6 +210,7 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
  * These masks reflect the current use of the affinity levels.
  * The affinity level can be up to 16 bits according to ARM ARM
  */
+#define MPIDR_HWID_BITMASK 0xFFFFFF
 
 #define MPIDR_LEVEL0_MASK 0x3
 #define MPIDR_LEVEL0_SHIFT 0
@@ -160,6 +309,8 @@ void store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
+	update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
+
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
 		cpu_topology[cpuid].core_id,
@@ -187,4 +338,6 @@ void init_cpu_topology(void)
 		set_power_scale(cpu, SCHED_POWER_SCALE);
 	}
 	smp_wmb();
+
+	parse_dt_topology();
 }
-- 
cgit v1.2.2