From 9c8b39077b0a5b0990bb707fcd7b5913d7b322b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Nov 2011 19:56:05 -0500
Subject: powerpc: Fix build breakage in jump_label.c

Should do what other architectures do and wrap all that code into
the appropriate ifdef

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/jump_label.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index 368d158d665d..a1ed8a8c7cb4 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -11,6 +11,7 @@
 #include <linux/jump_label.h>
 #include <asm/code-patching.h>
 
+#ifdef HAVE_JUMP_LABEL
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 {
@@ -21,3 +22,4 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	else
 		patch_instruction(addr, PPC_INST_NOP);
 }
+#endif
-- 
cgit v1.2.2


From bbc24a25e29136a56cf5015ef23eb2c2e8828023 Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Mon, 14 Nov 2011 16:43:50 +0000
Subject: powerpc/4xx: Fix typos in kexec config dependencies

Kexec is not supported on 47x. 47x is a variant of 44x with slightly
different MMU and SMP support. There was a typo in the config dependency
for kexec. This patch fixes the same.

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Cc:	Kumar Gala <galak@kernel.crashing.org>
Cc:	Josh Boyer <jwboyer@gmail.com>
Cc:	linux ppc dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/misc_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index f7d760ab5ca1..7cd07b42ca1a 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -738,7 +738,7 @@ relocate_new_kernel:
 	mr      r5, r31
 
 	li	r0, 0
-#elif defined(CONFIG_44x)  && !defined(CONFIG_47x)
+#elif defined(CONFIG_44x)  && !defined(CONFIG_PPC_47x)
 
 /*
  * Code for setting up 1:1 mapping for PPC440x for KEXEC
-- 
cgit v1.2.2


From 6d1e2c6c1a0b800473db4df8595c95745be548ea Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 14 Nov 2011 12:55:47 +0000
Subject: powerpc: panic if we can't instantiate RTAS

I had to debug a strange situation where all manner of things were
failing. SMT threads, storage and network were all completely broken.

The root cause was we couldn't find enough memory to instantiate RTAS -
this was a network install so the initrd was huge.

Instead of limping along and failing in mysterious ways we should just
panic up front if RTAS exists and we can't allocate space for it.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index b4fa66127495..cc584865b3df 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1579,10 +1579,8 @@ static void __init prom_instantiate_rtas(void)
 		return;
 
 	base = alloc_down(size, PAGE_SIZE, 0);
-	if (base == 0) {
-		prom_printf("RTAS allocation failed !\n");
-		return;
-	}
+	if (base == 0)
+		prom_panic("Could not allocate memory for RTAS\n");
 
 	rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
 	if (!IHANDLE_VALID(rtas_inst)) {
-- 
cgit v1.2.2


From d715e433b7ad19c02fc4becf0d5e9a59f97925de Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 14 Nov 2011 12:54:47 +0000
Subject: powerpc: Copy down exception vectors after feature fixups

kdump fails because we try to execute an HV only instruction. Feature
fixups are being applied after we copy the exception vectors down to 0
so they miss out on any updates.

We have always had this issue but it only became critical in v3.0
when we added CFAR support (breaks POWER5) and v3.1 when we added
POWERNV (breaks everyone).

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: <stable@kernel.org> [v3.0+]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/kvm.c      | 1 -
 arch/powerpc/kernel/setup_32.c | 2 ++
 arch/powerpc/kernel/setup_64.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 35f27646c4ff..2985338d0e10 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -132,7 +132,6 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
 	/* On relocatable kernels interrupts handlers and our code
 	   can be in different regions, so we don't patch them */
 
-	extern u32 __end_interrupts;
 	if ((ulong)inst < (ulong)&__end_interrupts)
 		return;
 #endif
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c1ce86357ecb..ac7610815113 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -107,6 +107,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
 			 PTRRELOC(&__start___lwsync_fixup),
 			 PTRRELOC(&__stop___lwsync_fixup));
 
+	do_final_fixups();
+
 	return KERNELBASE + offset;
 }
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1a9dea80a69b..fb9bb46e7e88 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -359,6 +359,7 @@ void __init setup_system(void)
 			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 	do_lwsync_fixups(cur_cpu_spec->cpu_features,
 			 &__start___lwsync_fixup, &__stop___lwsync_fixup);
+	do_final_fixups();
 
 	/*
 	 * Unflatten the device-tree passed by prom_init or kexec
-- 
cgit v1.2.2


From 2cd76629f6c40f7b227ba7b3885ce10e6ec0face Mon Sep 17 00:00:00 2001
From: Kevin Hao <kexin.hao@windriver.com>
Date: Thu, 10 Nov 2011 16:04:17 +0000
Subject: powerpc/trace: Add a dummy stack frame for trace_hardirqs_off

The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
If an exception occurs in user mode, there is only one stack frame
on the stack and accessing the CALLER_ADDR1 will causes the following
call trace. So we create a dummy stack frame to make
trace_hardirqs_off happy.

WARNING: at kernel/smp.c:459
Modules linked in:
NIP: c0093280 LR: c00930a0 CTR: c0010780
REGS: edb87ae0 TRAP: 0700   Not tainted  (3.1.0)
MSR: 00021002 <ME,CE>  CR: 28002888  XER: 00000000
TASK = edce2ac0[17658] 'mthread-lock-on' THREAD: edb86000 CPU: 5
GPR00: 00000001 edb87b90 edce2ac0 00000005 c0019594 edb87bd8 00000001 00000fe3
GPR08: 00041000 c084138c 4e20120d edb87b90 48002888 1001aa7c 00000000 00000000
GPR16: 48830000 10012a8c 00000000 10000af4 00000001 c0810000 00000000 00000000
GPR24: ee9aa920 c0816a18 00000000 00000005 c0019594 edb87bd8 ee20178c edb87b90
NIP [c0093280] smp_call_function_many+0x214/0x2b4
LR [c00930a0] smp_call_function_many+0x34/0x2b4
Call Trace:
[edb87b90] [c00930a0] smp_call_function_many+0x34/0x2b4 (unreliable)
[edb87bd0] [c00194ec] __flush_tlb_page+0xac/0x100
[edb87c00] [c001957c] flush_tlb_page+0x3c/0x54
[edb87c10] [c00180ac] ptep_set_access_flags+0x74/0x12c
[edb87c40] [c0128068] handle_pte_fault+0x2f0/0x9ac
[edb87cb0] [c0128c3c] handle_mm_fault+0x104/0x1dc
[edb87ce0] [c05f40f4] do_page_fault+0x2dc/0x630
[edb87e50] [c001078c] handle_page_fault+0xc/0x80

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/entry_32.S | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc0ab08..4f80cf1ce77b 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -215,7 +215,22 @@ reenable_mmu:				/* re-enable mmu so we can */
 	stw	r9,8(r1)
 	stw	r11,12(r1)
 	stw	r3,ORIG_GPR3(r1)
+	/*
+	 * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
+	 * If from user mode there is only one stack frame on the stack, and
+	 * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
+	 * stack frame to make trace_hardirqs_off happy.
+	 */
+	andi.	r12,r12,MSR_PR
+	beq	11f
+	stwu	r1,-16(r1)
+	bl	trace_hardirqs_off
+	addi	r1,r1,16
+	b	12f
+
+11:
 	bl	trace_hardirqs_off
+12:
 	lwz	r0,GPR0(r1)
 	lwz	r3,ORIG_GPR3(r1)
 	lwz	r4,GPR4(r1)
-- 
cgit v1.2.2


From ba28c9aae26ef7f3651eef6835fae30a979f88ba Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 6 Oct 2011 02:53:38 +0000
Subject: powerpc: Revert show_regs() define for readability

We had an existing ifdef for 4xx & BOOKE processors that got changed to
CONFIG_PPC_ADV_DEBUG_REGS.  The define has nothing to do with
CONFIG_PPC_ADV_DEBUG_REGS.  The define really should be:

 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)

and not

 #ifdef CONFIG_PPC_ADV_DEBUG_REGS

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9054ca9ab4f9..ad3612af0a04 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -657,7 +657,7 @@ void show_regs(struct pt_regs * regs)
 	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
 		printk("CFAR: "REG"\n", regs->orig_gpr3);
 	if (trap == 0x300 || trap == 0x600)
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 		printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
 #else
 		printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
-- 
cgit v1.2.2


From b95bc2191412f5ecf2781c966110a13fa82a80d3 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 6 Oct 2011 02:53:39 +0000
Subject: powerpc: Remove extraneous CONFIG_PPC_ADV_DEBUG_REGS define

All of DebugException is already protected by CONFIG_PPC_ADV_DEBUG_REGS
there is no need to have another such ifdef inside the function.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/traps.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 4e5908264d1a..5459d148a0f6 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1298,14 +1298,12 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 
 		if (user_mode(regs)) {
 			current->thread.dbcr0 &= ~DBCR0_IC;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 			if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
 					       current->thread.dbcr1))
 				regs->msr |= MSR_DE;
 			else
 				/* Make sure the IDM bit is off */
 				current->thread.dbcr0 &= ~DBCR0_IDM;
-#endif
 		}
 
 		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
-- 
cgit v1.2.2


From 187b9f2aa769198daa7cf8054abb65a08b8d8b47 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 6 Oct 2011 02:53:40 +0000
Subject: powerpc/book3e-64: Fix debug support for userspace

With the introduction of CONFIG_PPC_ADV_DEBUG_REGS user space debug is
broken on Book-E 64-bit parts that support delayed debug events.  When
switch_booke_debug_regs() sets DBCR0 we'll start getting debug events as
MSR_DE is also set and we aren't able to handle debug events from kernel
space.

We can remove the hack that always enables MSR_DE and loads up DBCR0 and
just utilize switch_booke_debug_regs() to get user space debug working
again.

We still need to handle critical/debug exception stacks & proper
save/restore of state for those exception levles to support debug events
from kernel space like we have on 32-bit.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/process.c | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ad3612af0a04..6457574c0b2f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -486,28 +486,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	new_thread = &new->thread;
 	old_thread = &current->thread;
 
-#if defined(CONFIG_PPC_BOOK3E_64)
-	/* XXX Current Book3E code doesn't deal with kernel side DBCR0,
-	 * we always hold the user values, so we set it now.
-	 *
-	 * However, we ensure the kernel MSR:DE is appropriately cleared too
-	 * to avoid spurrious single step exceptions in the kernel.
-	 *
-	 * This will have to change to merge with the ppc32 code at some point,
-	 * but I don't like much what ppc32 is doing today so there's some
-	 * thinking needed there
-	 */
-	if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) {
-		u32 dbcr0;
-
-		mtmsr(mfmsr() & ~MSR_DE);
-		isync();
-		dbcr0 = mfspr(SPRN_DBCR0);
-		dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0;
-		mtspr(SPRN_DBCR0, dbcr0);
-	}
-#endif /* CONFIG_PPC64_BOOK3E */
-
 #ifdef CONFIG_PPC64
 	/*
 	 * Collect processor utilization data per process
-- 
cgit v1.2.2


From a313f4c55d4952f2105fe33a4957ed858e998359 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 8 Nov 2011 04:51:19 +0000
Subject: powerpc/signal32: Fix sigset_t conversion when copying to user

On PPC64, put_sigset_t converts a sigset_t to a compat_sigset_t
before copying it to userspace. There is a typo in the case that
we have 4 words to copy, meaning that we corrupt the compat_sigset_t.

It appears that _NSIG_WORDS can't be greater than 2 at the moment
so this code is probably always optimised away anyway.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/signal_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 78b76dc54dfb..836a5a19eb2c 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -97,7 +97,7 @@ static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 	compat_sigset_t	cset;
 
 	switch (_NSIG_WORDS) {
-	case 4: cset.sig[5] = set->sig[3] & 0xffffffffull;
+	case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
 		cset.sig[7] = set->sig[3] >> 32;
 	case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
 		cset.sig[5] = set->sig[2] >> 32;
-- 
cgit v1.2.2


From 05737c7c5bca9a4f3e0f8bb9476445971b64fafd Mon Sep 17 00:00:00 2001
From: Jason Jin <Jason.jin@freescale.com>
Date: Fri, 28 Oct 2011 16:08:00 +0800
Subject: powerpc/fsl-pci: Don't hide resource for pci/e when configured as
 Agent/EP

Current pci/pcie init code will hide the pci/pcie host resource.
But did not judge it is host/RC or agent/EP. If configured as
agent/EP, we should avoid hiding its resource in the host side.

In PCI system, the Programing Interface can be used to judge the
host/agent status:
Programing Interface = 0: host
Programing Interface = 1: Agent

In PCIE system, both the Programing Interface and Header type can
be used to judge the RC/EP status.
Header Type = 0: EP
Header Type = 1: RC

Signed-off-by: Jason Jin <Jason.jin@freescale.com>
Signed-off-by: Mingkai Hu <Mingkai.hu@freescale.com>
Signed-off-by: Jia Hongtao <B38951@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 458ed3bee663..069aa75e7161 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1747,10 +1747,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
 static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
 {
 	int i, class = dev->class >> 8;
+	/* When configured as agent, programing interface = 1 */
+	int prog_if = dev->class & 0xf;
 
 	if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
 	     class == PCI_CLASS_BRIDGE_OTHER) &&
 		(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
+		(prog_if == 0) &&
 		(dev->bus->parent == NULL)) {
 		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 			dev->resource[i].start = 0;
-- 
cgit v1.2.2


From 37fb9a0231ee43d42d069863bdfd567fca2b61af Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:17 +0000
Subject: powerpc/time: Handle wrapping of decrementer

When re-enabling interrupts we have code to handle edge sensitive
decrementers by resetting the decrementer to 1 whenever it is negative.
If interrupts were disabled long enough that the decrementer wrapped to
positive we do nothing. This means interrupts can be delayed for a long
time until it finally goes negative again.

While we hope interrupts are never be disabled long enough for the
decrementer to go positive, we have a very good test team that can
drive any kernel into the ground. The softlockup data we get back
from these fails could be seconds in the future, completely missing
the cause of the lockup.

We already keep track of the timebase of the next event so use that
to work out if we should trigger a decrementer exception.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c  | 15 ++++++---------
 arch/powerpc/kernel/time.c |  9 +++++++++
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c3c46948d94..745c1e7c10fd 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -164,16 +164,13 @@ notrace void arch_local_irq_restore(unsigned long en)
 	 */
 	local_paca->hard_enabled = en;
 
-#ifndef CONFIG_BOOKE
-	/* On server, re-trigger the decrementer if it went negative since
-	 * some processors only trigger on edge transitions of the sign bit.
-	 *
-	 * BookE has a level sensitive decrementer (latches in TSR) so we
-	 * don't need that
+	/*
+	 * Trigger the decrementer if we have a pending event. Some processors
+	 * only trigger on edge transitions of the sign bit. We might also
+	 * have disabled interrupts long enough that the decrementer wrapped
+	 * to positive.
 	 */
-	if ((int)mfspr(SPRN_DEC) < 0)
-		mtspr(SPRN_DEC, 1);
-#endif /* CONFIG_BOOKE */
+	decrementer_check_overflow();
 
 	/*
 	 * Force the delivery of pending soft-disabled interrupts on PS3.
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 522bb1dfc353..5db163c96751 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -889,6 +889,15 @@ static void __init clocksource_init(void)
 	       clock->name, clock->mult, clock->shift);
 }
 
+void decrementer_check_overflow(void)
+{
+	u64 now = get_tb_or_rtc();
+	struct decrementer_clock *decrementer = &__get_cpu_var(decrementers);
+
+	if (now >= decrementer->next_tb)
+		set_dec(1);
+}
+
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
-- 
cgit v1.2.2


From d8afc6fd95496204174f19af0cb39eefee0c3e8a Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:18 +0000
Subject: powerpc/time: Use clockevents_calc_mult_shift

We can use clockevents_calc_mult_shift instead of doing all
the work ourselves.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 5db163c96751..fae3094c2a9a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -112,8 +112,6 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 static struct clock_event_device decrementer_clockevent = {
        .name           = "decrementer",
        .rating         = 200,
-       .shift          = 0,	/* To be filled in */
-       .mult           = 0,	/* To be filled in */
        .irq            = 0,
        .set_next_event = decrementer_set_next_event,
        .set_mode       = decrementer_set_mode,
@@ -913,31 +911,6 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 		decrementer_set_next_event(DECREMENTER_MAX, dev);
 }
 
-static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec,
-				int shift)
-{
-	uint64_t tmp = ((uint64_t)ticks) << shift;
-
-	do_div(tmp, nsec);
-	return tmp;
-}
-
-static void __init setup_clockevent_multiplier(unsigned long hz)
-{
-	u64 mult, shift = 32;
-
-	while (1) {
-		mult = div_sc64(hz, NSEC_PER_SEC, shift);
-		if (mult && (mult >> 32UL) == 0UL)
-			break;
-
-		shift--;
-	}
-
-	decrementer_clockevent.shift = shift;
-	decrementer_clockevent.mult = mult;
-}
-
 static void register_decrementer_clockevent(int cpu)
 {
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
@@ -955,7 +928,8 @@ static void __init init_decrementer_clockevent(void)
 {
 	int cpu = smp_processor_id();
 
-	setup_clockevent_multiplier(ppc_tb_freq);
+	clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
+
 	decrementer_clockevent.max_delta_ns =
 		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
 	decrementer_clockevent.min_delta_ns =
-- 
cgit v1.2.2


From 11b8633ada8633991e584951d0027f2741162201 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:19 +0000
Subject: powerpc/time: Use clocksource_register_hz

Use clocksource_register_hz which calculates the shift/mult
factors for us. Also remove the shift = 22 assumption in
vsyscall_update - thanks to Paul Mackerras and John Stultz for
catching that.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fae3094c2a9a..d204b726a185 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -86,8 +86,6 @@ static struct clocksource clocksource_rtc = {
 	.rating       = 400,
 	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
 	.mask         = CLOCKSOURCE_MASK(64),
-	.shift        = 22,
-	.mult         = 0,	/* To be filled in */
 	.read         = rtc_read,
 };
 
@@ -97,8 +95,6 @@ static struct clocksource clocksource_timebase = {
 	.rating       = 400,
 	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
 	.mask         = CLOCKSOURCE_MASK(64),
-	.shift        = 22,
-	.mult         = 0,	/* To be filled in */
 	.read         = timebase_read,
 };
 
@@ -822,9 +818,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 	++vdso_data->tb_update_count;
 	smp_mb();
 
-	/* XXX this assumes clock->shift == 22 */
-	/* 4611686018 ~= 2^(20+64-22) / 1e9 */
-	new_tb_to_xs = (u64) mult * 4611686018ULL;
+	/* 19342813113834067 ~= 2^(20+64) / 1e9 */
+	new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
 	new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
 	do_div(new_stamp_xsec, 1000000000);
 	new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
@@ -875,9 +870,7 @@ static void __init clocksource_init(void)
 	else
 		clock = &clocksource_timebase;
 
-	clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift);
-
-	if (clocksource_register(clock)) {
+	if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
 		printk(KERN_ERR "clocksource: %s is already registered\n",
 		       clock->name);
 		return;
-- 
cgit v1.2.2


From 68568add2ca70153cca3dd1858eaa0776821cf75 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:20 +0000
Subject: powerpc/time: Remove unnecessary sanity check of decrementer
 expiration

The clockevents code uses max_delta_ns to avoid calling a
clockevent with too large a value.

Remove the redundant version of this in the timer_interrupt
code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d204b726a185..2eaaa242c2e6 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -572,7 +572,6 @@ void timer_interrupt(struct pt_regs * regs)
 	struct pt_regs *old_regs;
 	struct decrementer_clock *decrementer =  &__get_cpu_var(decrementers);
 	struct clock_event_device *evt = &decrementer->event;
-	u64 now;
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -607,16 +606,9 @@ void timer_interrupt(struct pt_regs * regs)
 		get_lppaca()->int_dword.fields.decr_int = 0;
 #endif
 
-	now = get_tb_or_rtc();
-	if (now >= decrementer->next_tb) {
-		decrementer->next_tb = ~(u64)0;
-		if (evt->event_handler)
-			evt->event_handler(evt);
-	} else {
-		now = decrementer->next_tb - now;
-		if (now <= DECREMENTER_MAX)
-			set_dec((int)now);
-	}
+	decrementer->next_tb = ~(u64)0;
+	if (evt->event_handler)
+		evt->event_handler(evt);
 
 #ifdef CONFIG_PPC_ISERIES
 	if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
-- 
cgit v1.2.2


From 621692cb7efb6d0e38c62e41844a6360c6719b20 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:21 +0000
Subject: powerpc/time: Fix some style issues

Fix some formatting issues and use the DECREMENTER_MAX
define instead of 0x7fffffff.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2eaaa242c2e6..b1990b987e2c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -106,12 +106,12 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 				 struct clock_event_device *dev);
 
 static struct clock_event_device decrementer_clockevent = {
-       .name           = "decrementer",
-       .rating         = 200,
-       .irq            = 0,
-       .set_next_event = decrementer_set_next_event,
-       .set_mode       = decrementer_set_mode,
-       .features       = CLOCK_EVT_FEAT_ONESHOT,
+	.name           = "decrementer",
+	.rating         = 200,
+	.irq            = 0,
+	.set_next_event = decrementer_set_next_event,
+	.set_mode       = decrementer_set_mode,
+	.features       = CLOCK_EVT_FEAT_ONESHOT,
 };
 
 struct decrementer_clock {
@@ -435,7 +435,7 @@ EXPORT_SYMBOL(profile_pc);
 /* 
  * This function recalibrates the timebase based on the 49-bit time-of-day
  * value in the Titan chip.  The Titan is much more accurate than the value
- * returned by the service processor for the timebase frequency.  
+ * returned by the service processor for the timebase frequency.
  */
 
 static int __init iSeries_tb_recal(void)
@@ -636,9 +636,9 @@ static void generic_suspend_disable_irqs(void)
 	 * with suspending.
 	 */
 
-	set_dec(0x7fffffff);
+	set_dec(DECREMENTER_MAX);
 	local_irq_disable();
-	set_dec(0x7fffffff);
+	set_dec(DECREMENTER_MAX);
 }
 
 static void generic_suspend_enable_irqs(void)
@@ -982,10 +982,10 @@ void __init time_init(void)
 	boot_tb = get_tb_or_rtc();
 
 	/* If platform provided a timezone (pmac), we correct the time */
-        if (timezone_offset) {
+	if (timezone_offset) {
 		sys_tz.tz_minuteswest = -timezone_offset / 60;
 		sys_tz.tz_dsttime = 0;
-        }
+	}
 
 	vdso_data->tb_update_count = 0;
 	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
-- 
cgit v1.2.2


From 7df1027542c9353bef4d027cb4ab8e99f69017b7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 23 Nov 2011 20:07:22 +0000
Subject: powerpc/time: Optimise decrementer_check_overflow

decrementer_check_overflow is called from arch_local_irq_restore so
we want to make it as light weight as possible. As such, turn
decrementer_check_overflow into an inline function.

To avoid a circular mess of includes, separate out the two components
of struct decrementer_clock and keep the struct clock_event_device
part local to time.c.

The fast path improves from:

arch_local_irq_restore
     0:       mflr    r0
     4:       std     r0,16(r1)
     8:       stdu    r1,-112(r1)
     c:       stb     r3,578(r13)
    10:       cmpdi   cr7,r3,0
    14:       beq-    cr7,24 <.arch_local_irq_restore+0x24>
...
    24:       addi    r1,r1,112
    28:       ld      r0,16(r1)
    2c:       mtlr    r0
    30:       blr

to:

arch_local_irq_restore
    0:       std     r30,-16(r1)
    4:       ld      r30,0(r2)
    8:       stb     r3,578(r13)
    c:       cmpdi   cr7,r3,0
   10:       beq-    cr7,6c <.arch_local_irq_restore+0x6c>
...
   6c:       ld      r30,-16(r1)
   70:       blr

Unfortunately we still setup a local TOC (due to -mminimal-toc). Yet
another sign we should be moving to -mcmodel=medium.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c  |  9 +++++++++
 arch/powerpc/kernel/time.c | 27 +++++++--------------------
 2 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 745c1e7c10fd..2ff4f5e59620 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -115,6 +115,15 @@ static inline notrace void set_soft_enabled(unsigned long enable)
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
+static inline notrace void decrementer_check_overflow(void)
+{
+	u64 now = get_tb_or_rtc();
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+	if (now >= *next_tb)
+		set_dec(1);
+}
+
 notrace void arch_local_irq_restore(unsigned long en)
 {
 	/*
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b1990b987e2c..9754743db8b9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -114,12 +114,8 @@ static struct clock_event_device decrementer_clockevent = {
 	.features       = CLOCK_EVT_FEAT_ONESHOT,
 };
 
-struct decrementer_clock {
-	struct clock_event_device event;
-	u64 next_tb;
-};
-
-static DEFINE_PER_CPU(struct decrementer_clock, decrementers);
+DEFINE_PER_CPU(u64, decrementers_next_tb);
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 
 #ifdef CONFIG_PPC_ISERIES
 static unsigned long __initdata iSeries_recal_titan;
@@ -570,8 +566,8 @@ void arch_irq_work_raise(void)
 void timer_interrupt(struct pt_regs * regs)
 {
 	struct pt_regs *old_regs;
-	struct decrementer_clock *decrementer =  &__get_cpu_var(decrementers);
-	struct clock_event_device *evt = &decrementer->event;
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+	struct clock_event_device *evt = &__get_cpu_var(decrementers);
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -606,7 +602,7 @@ void timer_interrupt(struct pt_regs * regs)
 		get_lppaca()->int_dword.fields.decr_int = 0;
 #endif
 
-	decrementer->next_tb = ~(u64)0;
+	*next_tb = ~(u64)0;
 	if (evt->event_handler)
 		evt->event_handler(evt);
 
@@ -872,19 +868,10 @@ static void __init clocksource_init(void)
 	       clock->name, clock->mult, clock->shift);
 }
 
-void decrementer_check_overflow(void)
-{
-	u64 now = get_tb_or_rtc();
-	struct decrementer_clock *decrementer = &__get_cpu_var(decrementers);
-
-	if (now >= decrementer->next_tb)
-		set_dec(1);
-}
-
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev)
 {
-	__get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt;
+	__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
 	set_dec(evt);
 	return 0;
 }
@@ -898,7 +885,7 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 
 static void register_decrementer_clockevent(int cpu)
 {
-	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
+	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
 
 	*dec = decrementer_clockevent;
 	dec->cpumask = cpumask_of(cpu);
-- 
cgit v1.2.2


From fac26ad4f9cb794c9d1032f55f40a31cb55be09a Mon Sep 17 00:00:00 2001
From: Jimi Xenidis <jimix@pobox.com>
Date: Thu, 29 Sep 2011 10:55:13 +0000
Subject: powerpc/book3e: Add ICSWX/ACOP support to Book3e cores like A2

ICSWX is also used by the A2 processor to access coprocessors,
although not all "chips" that contain A2s have coprocessors.

Signed-off-by: Jimi Xenidis <jimix@pobox.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/cpu_setup_a2.S | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
index 7f818feaa7a5..ebc62f42a237 100644
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -41,11 +41,16 @@ _GLOBAL(__setup_cpu_a2)
 	 * core local but doing it always won't hurt
 	 */
 
-#ifdef CONFIG_PPC_WSP_COPRO
+#ifdef CONFIG_PPC_ICSWX
 	/* Make sure ACOP starts out as zero */
 	li	r3,0
 	mtspr   SPRN_ACOP,r3
 
+	/* Skip the following if we are in Guest mode */
+	mfmsr	r3
+	andis.	r0,r3,MSR_GS@h
+	bne	_icswx_skip_guest
+
 	/* Enable icswx instruction */
 	mfspr   r3,SPRN_A2_CCR2
 	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
@@ -54,7 +59,8 @@ _GLOBAL(__setup_cpu_a2)
 	/* Unmask all CTs in HACOP */
 	li      r3,-1
 	mtspr   SPRN_HACOP,r3
-#endif /* CONFIG_PPC_WSP_COPRO */
+_icswx_skip_guest:
+#endif /* CONFIG_PPC_ICSWX */
 
 	/* Enable doorbell */
 	mfspr   r3,SPRN_A2_CCR2
-- 
cgit v1.2.2


From df17f56d8a1a3a533b6b3e3a49a624626a49b197 Mon Sep 17 00:00:00 2001
From: "Ravi K. Nittala" <ravi.nittala@in.ibm.com>
Date: Mon, 3 Oct 2011 21:49:53 +0000
Subject: powerpc/pseries: Cancel RTAS event scan before firmware flash

The RTAS firmware flash update is conducted using an RTAS call that is
serialized by lock_rtas() which uses spin_lock. While the flash is in
progress, rtasd performs scan for any RTAS events that are generated by
the system. rtasd keeps scanning for the RTAS events generated on the
machine. This is performed via workqueue mechanism. The rtas_event_scan()
also uses an RTAS call to scan the events, eventually trying to acquire
the spin_lock before issuing the request.

The flash update takes a while to complete and during this time, any other
RTAS call has to wait. In this case, rtas_event_scan() waits for a long time
on the spin_lock resulting in a soft lockup.

Fix: Just before the flash update is performed, the queued rtas_event_scan()
work item is cancelled from the work queue so that there is no other RTAS
call issued while the flash is in progress. After the flash completes, the
system reboots and the rtas_event_scan() is rescheduled.

Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
Signed-off-by: Ravi Nittala <ravi.nittala@in.ibm.com>
Reported-by: Divya Vikas <divya.vikas@in.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/rtas_flash.c | 6 ++++++
 arch/powerpc/kernel/rtasd.c      | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index e037c7494fd8..4174b4b23246 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -567,6 +567,12 @@ static void rtas_flash_firmware(int reboot_type)
 		return;
 	}
 
+	/*
+	 * Just before starting the firmware flash, cancel the event scan work
+	 * to avoid any soft lockup issues.
+	 */
+	rtas_cancel_event_scan();
+
 	/*
 	 * NOTE: the "first" block must be under 4GB, so we create
 	 * an entry with no data blocks in the reserved buffer in
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 481ef064c8f1..1045ff49cc6d 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -472,6 +472,13 @@ static void start_event_scan(void)
 				 &event_scan_work, event_scan_delay);
 }
 
+/* Cancel the rtas event scan work */
+void rtas_cancel_event_scan(void)
+{
+	cancel_delayed_work_sync(&event_scan_work);
+}
+EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
+
 static int __init rtas_init(void)
 {
 	struct proc_dir_entry *entry;
-- 
cgit v1.2.2


From 3b5e16d7ad0cf4b03a0650092a6fdcb27b536a82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 Oct 2011 02:30:50 +0000
Subject: powerpc: Mark IPI interrupts IRQF_NO_THREAD

IPI handlers cannot be threaded. Remove the obsolete IRQF_DISABLED
flag (see commit e58aa3d2) while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 6df70907d60a..f0abe92f63f2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -187,7 +187,8 @@ int smp_request_message_ipi(int virq, int msg)
 		return 1;
 	}
 #endif
-	err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU,
+	err = request_irq(virq, smp_ipi_action[msg],
+			  IRQF_PERCPU | IRQF_NO_THREAD,
 			  smp_ipi_name[msg], 0);
 	WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
 		virq, smp_ipi_name[msg], err);
-- 
cgit v1.2.2


From 491b98c315dbe39b20bd4a24a6179c42349f42c0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sun, 6 Nov 2011 18:55:57 +0000
Subject: powerpc/pci: Add a platform hook after probe and before resource
 survey

Some platforms need to perform resource allocation using a custom algorithm
due to HW constraints, or may want to tweak things globally below a host
bridge. For example OPAL support for IODA will need to perform a
resource allocation pass that applies IODA specific segmentation
constraints to MMIO which cannot be done simply using the kernel generic
resource management code.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 458ed3bee663..f5b753de3718 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1732,6 +1732,12 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
 	if (mode == PCI_PROBE_NORMAL)
 		hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
 
+	/* Platform gets a chance to do some global fixups before
+	 * we proceed to resource allocation
+	 */
+	if (ppc_md.pcibios_fixup_phb)
+		ppc_md.pcibios_fixup_phb(hose);
+
 	/* Configure PCI Express settings */
 	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
 		struct pci_bus *child;
-- 
cgit v1.2.2


From 48c2ce97fa406607ca5e11a76cf507d171452dd9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sun, 6 Nov 2011 18:55:58 +0000
Subject: powerpc/pci: Change how re-assigning resouces work

When PCI_REASSIGN_ALL_RSRC is set, we used to clear all bus resources
at the beginning of survey and re-allocate them later.

This changes it so instead, during early fixup, we mark all resources
as IORESOURCE_UNSET and move them down to be 0-based.

Later, if bus resources are still unset at the beginning of the survey,
then we clear them.

This shouldn't impact the re-assignment case on 4xx, but will enable
us to have the platform do some custom resource assignment before the
survey, by clearing individual resources IORESOURCE_UNSET bit.

Also limits the clutter in the kernel log from fixup when re-assigning
since we don't care about the offset applied to the BAR values in this
case.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c | 66 ++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 30 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f5b753de3718..8f428fff545f 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -921,18 +921,22 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
 		struct resource *res = dev->resource + i;
 		if (!res->flags)
 			continue;
-		/* On platforms that have PCI_PROBE_ONLY set, we don't
-		 * consider 0 as an unassigned BAR value. It's technically
-		 * a valid value, but linux doesn't like it... so when we can
-		 * re-assign things, we do so, but if we can't, we keep it
-		 * around and hope for the best...
+
+		/* If we're going to re-assign everything, we mark all resources
+		 * as unset (and 0-base them). In addition, we mark BARs starting
+		 * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
+		 * since in that case, we don't want to re-assign anything
 		 */
-		if (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY)) {
-			pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] is unassigned\n",
-				 pci_name(dev), i,
-				 (unsigned long long)res->start,
-				 (unsigned long long)res->end,
-				 (unsigned int)res->flags);
+		if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
+		    (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+			/* Only print message if not re-assigning */
+			if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
+				pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
+					 "is unassigned\n",
+					 pci_name(dev), i,
+					 (unsigned long long)res->start,
+					 (unsigned long long)res->end,
+					 (unsigned int)res->flags);
 			res->end -= res->start;
 			res->start = 0;
 			res->flags |= IORESOURCE_UNSET;
@@ -1042,6 +1046,16 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
 		if (i >= 3 && bus->self->transparent)
 			continue;
 
+		/* If we are going to re-assign everything, mark the resource
+		 * as unset and move it down to 0
+		 */
+		if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+			res->flags |= IORESOURCE_UNSET;
+			res->end -= res->start;
+			res->start = 0;
+			continue;
+		}
+
 		pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n",
 			 pci_name(dev), i,
 			 (unsigned long long)res->start,\
@@ -1262,18 +1276,15 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
 	pci_bus_for_each_resource(bus, res, i) {
 		if (!res || !res->flags || res->start > res->end || res->parent)
 			continue;
+
+		/* If the resource was left unset at this point, we clear it */
+		if (res->flags & IORESOURCE_UNSET)
+			goto clear_resource;
+
 		if (bus->parent == NULL)
 			pr = (res->flags & IORESOURCE_IO) ?
 				&ioport_resource : &iomem_resource;
 		else {
-			/* Don't bother with non-root busses when
-			 * re-assigning all resources. We clear the
-			 * resource flags as if they were colliding
-			 * and as such ensure proper re-allocation
-			 * later.
-			 */
-			if (pci_has_flag(PCI_REASSIGN_ALL_RSRC))
-				goto clear_resource;
 			pr = pci_find_parent_resource(bus->self, res);
 			if (pr == res) {
 				/* this happens when the generic PCI
@@ -1304,9 +1315,9 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
 			if (reparent_resources(pr, res) == 0)
 				continue;
 		}
-		printk(KERN_WARNING "PCI: Cannot allocate resource region "
-		       "%d of PCI bridge %d, will remap\n", i, bus->number);
-clear_resource:
+		pr_warning("PCI: Cannot allocate resource region "
+			   "%d of PCI bridge %d, will remap\n", i, bus->number);
+	clear_resource:
 		res->start = res->end = 0;
 		res->flags = 0;
 	}
@@ -1451,16 +1462,11 @@ void __init pcibios_resource_survey(void)
 {
 	struct pci_bus *b;
 
-	/* Allocate and assign resources. If we re-assign everything, then
-	 * we skip the allocate phase
-	 */
+	/* Allocate and assign resources */
 	list_for_each_entry(b, &pci_root_buses, node)
 		pcibios_allocate_bus_resources(b);
-
-	if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
-		pcibios_allocate_resources(0);
-		pcibios_allocate_resources(1);
-	}
+	pcibios_allocate_resources(0);
+	pcibios_allocate_resources(1);
 
 	/* Before we start assigning unassigned resource, we try to reserve
 	 * the low IO area and the VGA memory area if they intersect the
-- 
cgit v1.2.2


From 184cd4a3b962a4769889615430eaf40076b97969 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 15 Nov 2011 17:29:08 +0000
Subject: powerpc/powernv: PCI support for p7IOC under OPAL v2

This adds support for p7IOC (and possibly other IODA v1 IO Hubs)
using OPAL v2 interfaces.

We completely take over resource assignment and assign them using an
algorithm that hands out device BARs in a way that makes them fit in
individual segments of the M32 window of the bridge, which enables us
to assign individual PEs to devices and functions.

The current implementation gives out a PE per functions on PCIe, and a
PE for the entire bridge for PCIe to PCI-X bridges.

This can be adjusted / fine tuned later.

We also setup DMA resources (32-bit only for now) and MSIs (both 32-bit
and 64-bit MSI are supported).

The DMA allocation tries to divide the available 256M segments of the
32-bit DMA address space "fairly" among PEs. This is done using a
"weight" heuristic which assigns less value to things like OHCI USB
controllers than, for example SCSI RAID controllers. This algorithm
will probably want some fine tuning for specific devices or device
types.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci_dn.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e69deb89b37..dd9e4a04bf79 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
+#ifdef CONFIG_PPC_POWERNV
+	pdn->pe_number = IODA_INVALID_PE;
+#endif
 	regs = of_get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
-- 
cgit v1.2.2


From 595fe91447b03cb72c97f45bc5db30fd73b66b38 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Thu, 10 Nov 2011 19:58:53 +0000
Subject: powerpc: Export PIR data through sysfs

On Fri, Nov 11, 2011 at 10:17:55AM +0530, Ananth N Mavinakayanahalli wrote:
> >
> > At this rate we're going to end up with no bits left for CPU features
> > way too quickly... Especially for something we only care about once at
> > boot time.
> >
> > Wouldn't CPU_FTR_PPCAS_ARCH_V2 be a good enough test ?
>
> /me checks Cell manuals... yes, that test would be good enough. I will
> cook up a patch to use this.

Here it is...

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/sysfs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ce035c1905f0..f579be552094 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -177,11 +177,13 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_PMCSETUP(purr, SPRN_PURR);
 SYSFS_PMCSETUP(spurr, SPRN_SPURR);
 SYSFS_PMCSETUP(dscr, SPRN_DSCR);
+SYSFS_PMCSETUP(pir, SPRN_PIR);
 
 static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
 static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
 static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+static SYSDEV_ATTR(pir, 0400, show_pir, NULL);
 
 unsigned long dscr_default = 0;
 EXPORT_SYMBOL(dscr_default);
@@ -392,6 +394,9 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
 		sysdev_create_file(s, &attr_dscr);
+
+	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+		sysdev_create_file(s, &attr_pir);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_online(cpu);
@@ -462,6 +467,9 @@ static void unregister_cpu_online(unsigned int cpu)
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
 		sysdev_remove_file(s, &attr_dscr);
+
+	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+		sysdev_remove_file(s, &attr_pir);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_offline(cpu);
-- 
cgit v1.2.2


From 3bfd0c9c8f9cd2c09cf3e5376c7113eec3370ebd Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 24 Nov 2011 19:35:57 +0000
Subject: powerpc: Decode correct MSR bits in oops output

On a 64bit book3s machine I have an oops from a system reset that
claims the book3e CE bit was set:

MSR: 8000000000021032 <ME,CE,IR,DR>  CR: 24004082  XER: 00000010

On a book3s machine system reset sets IBM bit 46 and 47 depending on
the power saving mode. Separate the definitions by type and for
completeness add the rest of the bits in.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/process.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6457574c0b2f..ebe5766781aa 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -584,16 +584,32 @@ static struct regbit {
 	unsigned long bit;
 	const char *name;
 } msr_bits[] = {
+#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
+	{MSR_SF,	"SF"},
+	{MSR_HV,	"HV"},
+#endif
+	{MSR_VEC,	"VEC"},
+	{MSR_VSX,	"VSX"},
+#ifdef CONFIG_BOOKE
+	{MSR_CE,	"CE"},
+#endif
 	{MSR_EE,	"EE"},
 	{MSR_PR,	"PR"},
 	{MSR_FP,	"FP"},
-	{MSR_VEC,	"VEC"},
-	{MSR_VSX,	"VSX"},
 	{MSR_ME,	"ME"},
-	{MSR_CE,	"CE"},
+#ifdef CONFIG_BOOKE
 	{MSR_DE,	"DE"},
+#else
+	{MSR_SE,	"SE"},
+	{MSR_BE,	"BE"},
+#endif
 	{MSR_IR,	"IR"},
 	{MSR_DR,	"DR"},
+	{MSR_PMM,	"PMM"},
+#ifndef CONFIG_BOOKE
+	{MSR_RI,	"RI"},
+	{MSR_LE,	"LE"},
+#endif
 	{0,		NULL}
 };
 
-- 
cgit v1.2.2


From d5b9ee7b514ee2f3df649fe38d01494ad7a8b956 Mon Sep 17 00:00:00 2001
From: Tanmay Inamdar <tinamdar@apm.com>
Date: Mon, 28 Nov 2011 21:01:41 +0000
Subject: powerpc/40x: Add APM8018X SOC support

The AppliedMicro APM8018X embedded processor targets embedded applications that
require low power and a small footprint. It features a PowerPC 405 processor
core built in a 65nm low-power CMOS process with a five-stage pipeline executing
up to one instruction per cycle. The family has 128-kbytes of on-chip memory,
a 128-bit local bus and on-chip DDR2 SDRAM controller with 16-bit interface.

Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/cputable.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index edae5bb06f1f..ce59693835c7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1505,6 +1505,19 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_4xx,
 		.platform		= "ppc405",
 	},
+	{	/* APM8018X */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff11432,
+		.cpu_name		= "APM8018X",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 |
+			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
-- 
cgit v1.2.2


From a6146888be0aa80ea41c99178d7d2e08efc776b5 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Mon, 10 Oct 2011 10:50:43 +0000
Subject: powerpc: Add gpages reservation code for 64-bit FSL BOOKE

For 64-bit FSL_BOOKE implementations, gigantic pages need to be
reserved at boot time by the memblock code based on the command line.
This adds the call that handles the reservation, and fixes some code
comments.

It also removes the previous pr_err when reserve_hugetlb_gpages
is called on a system without hugetlb enabled - the way the code is
structured, the call is unconditional and the resulting error message
spurious and confusing.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup_64.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fb9bb46e7e88..4cb8f1e9d044 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -35,6 +35,8 @@
 #include <linux/pci.h>
 #include <linux/lockdep.h>
 #include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -64,6 +66,7 @@
 #include <asm/mmu_context.h>
 #include <asm/code-patching.h>
 #include <asm/kvm_ppc.h>
+#include <asm/hugetlb.h>
 
 #include "setup.h"
 
@@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	/*
+	 * Reserve any gigantic pages requested on the command line.
+	 * memblock needs to have been initialized by the time this is
+	 * called since this will reserve memory.
+	 */
+	reserve_hugetlb_gpages();
+
 	DBG(" <- early_setup()\n");
 }
 
-- 
cgit v1.2.2


From 40dfef66a9e3d4a308c3ed7355c9a89e68c08ffc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 29 Nov 2011 18:22:56 +0000
Subject: powerpc/powernv: Workaround OFW issues in prom_init.c

Open Firmware on OPAL machines seems to have issues if we close
stdin and/or we try to print things after calling "quiesce" so
we avoid doing both.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cc584865b3df..f4f9f2f14322 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2969,9 +2969,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	/*
 	 * in case stdin is USB and still active on IBM machines...
 	 * Unfortunately quiesce crashes on some powermacs if we have
-	 * closed stdin already (in particular the powerbook 101).
+	 * closed stdin already (in particular the powerbook 101). It
+	 * appears that the OPAL version of OFW doesn't like it either.
 	 */
-	if (RELOC(of_platform) != PLATFORM_POWERMAC)
+	if (RELOC(of_platform) != PLATFORM_POWERMAC &&
+	    RELOC(of_platform) != PLATFORM_OPAL)
 		prom_close_stdin();
 
 	/*
@@ -2987,8 +2989,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 * is common to us and kexec
 	 */
 	hdr = RELOC(dt_header_start);
-	prom_printf("returning from prom_init\n");
-	prom_debug("->dt_header_start=0x%x\n", hdr);
+
+	/* Don't print anything after quiesce under OPAL, it crashes OFW */
+	if (RELOC(of_platform) != PLATFORM_OPAL) {
+		prom_printf("returning from prom_init\n");
+		prom_debug("->dt_header_start=0x%x\n", hdr);
+	}
 
 #ifdef CONFIG_PPC32
 	reloc_got2(-offset);
-- 
cgit v1.2.2


From 4666ca2aa344105da0da3afda48d987c82261c05 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 29 Nov 2011 20:16:25 +0000
Subject: powerpc/pci: Make pci_read_irq_line() static

It's only used inside the same file where it's defined. There's
also no point exporting it anymore.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 9bffc028f45a..fa4a573d6716 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -214,7 +214,7 @@ char __devinit *pcibios_setup(char *str)
  * If the interrupt is used, then gets the interrupt line from the
  * openfirmware and sets it in the pci_dev and pci_config line.
  */
-int pci_read_irq_line(struct pci_dev *pci_dev)
+static int pci_read_irq_line(struct pci_dev *pci_dev)
 {
 	struct of_irq oirq;
 	unsigned int virq;
@@ -283,7 +283,6 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 
 	return 0;
 }
-EXPORT_SYMBOL(pci_read_irq_line);
 
 /*
  * Platform support for /proc/bus/pci/X/Y mmap()s,
-- 
cgit v1.2.2


From 771dae81896855d25f7f8746aaf56c0238deafb6 Mon Sep 17 00:00:00 2001
From: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Date: Wed, 30 Nov 2011 02:46:31 +0000
Subject: powerpc/cpuidle: Add cpu_idle_wait() to allow switching of idle
 routines

This patch provides cpu_idle_wait() routine for the powerpc
platform which is required by the cpuidle subsystem. This
routine is required to change the idle handler on SMP systems.
The equivalent routine for x86 is in arch/x86/kernel/process.c
but the powerpc implementation is different.

cpuidle_disable variable is to enable/disable cpuidle
framework if power_save option is set during the boot
time.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Signed-off-by: Arun R Bharadwaj <arun.r.bharadwaj@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/idle.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad58..8574b0e81ff1 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -39,9 +39,13 @@
 #define cpu_should_die()	0
 #endif
 
+unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(cpuidle_disable);
+
 static int __init powersave_off(char *arg)
 {
 	ppc_md.power_save = NULL;
+	cpuidle_disable = IDLE_POWERSAVE_OFF;
 	return 0;
 }
 __setup("powersave=off", powersave_off);
@@ -102,6 +106,29 @@ void cpu_idle(void)
 	}
 }
 
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ * This window may be larger on shared systems.
+ */
+void cpu_idle_wait(void)
+{
+	int cpu;
+	smp_mb();
+
+	/* kick all the CPUs so that they exit out of old idle routine */
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			smp_send_reschedule(cpu);
+	}
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
-- 
cgit v1.2.2


From 707827f3387d9b260d50fa697885a4042cea3bf4 Mon Sep 17 00:00:00 2001
From: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Date: Wed, 30 Nov 2011 02:46:42 +0000
Subject: powerpc/cpuidle: cpuidle driver for pSeries

This patch implements a back-end cpuidle driver for pSeries
based on pseries_dedicated_idle_loop and pseries_shared_idle_loop
routines.  The driver is built only if CONFIG_CPU_IDLE is set. This
cpuidle driver uses global registration of idle states and
not per-cpu.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Signed-off-by: Arun R Bharadwaj <arun.r.bharadwaj@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index f579be552094..6fdf5ffe8c44 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,7 @@
 #include <asm/machdep.h>
 #include <asm/smp.h>
 #include <asm/pmc.h>
+#include <asm/system.h>
 
 #include "cacheinfo.h"
 
@@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
 		return -EINVAL;
 
 	per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+	update_smt_snooze_delay(snooze);
 
 	return count;
 }
-- 
cgit v1.2.2


From 58154c8ce71a7854d969d73468fd00e5eeeab708 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:09 +0000
Subject: powerpc: Give us time to get all oopses out before panicking

I've been seeing truncated output when people send system reset info
to me. We should see a backtrace for every CPU, but the panic() code
takes the box down before they all make it out to the console. The
panic code runs unlocked so we also see corrupted console output.

If we are going to panic, then delay 1 second before calling into the
panic code. Move oops_exit inside the die lock and put a newline
between oopses for clarity.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/traps.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5459d148a0f6..91377870b26a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -158,6 +158,8 @@ int die(const char *str, struct pt_regs *regs, long err)
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	add_taint(TAINT_DIE);
+	oops_exit();
+	printk("\n");
 	raw_spin_unlock_irqrestore(&die.lock, flags);
 
 	if (kexec_should_crash(current) ||
@@ -165,13 +167,23 @@ int die(const char *str, struct pt_regs *regs, long err)
 		crash_kexec(regs);
 	crash_kexec_secondary(regs);
 
+	/*
+	 * While our oops output is serialised by a spinlock, output
+	 * from panic() called below can race and corrupt it. If we
+	 * know we are going to panic, delay for 1 second so we have a
+	 * chance to get clean backtraces from all CPUs that are oopsing.
+	 */
+	if (in_interrupt() || panic_on_oops || !current->pid ||
+	    is_global_init(current)) {
+		mdelay(MSEC_PER_SEC);
+	}
+
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
 	if (panic_on_oops)
 		panic("Fatal exception");
 
-	oops_exit();
 	do_exit(err);
 
 	return 0;
-- 
cgit v1.2.2


From 9b00ac06978c54788f13eefd34a07b77db48d567 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:10 +0000
Subject: powerpc: Remove broken and complicated kdump system reset code

We have a lot of complicated logic that handles possible recursion between
kdump and a system reset exception. We can solve this in a much simpler
way using the same setjmp/longjmp tricks xmon does.

As a first step, this patch removes the old system reset code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 87 +++++++--------------------------------------
 arch/powerpc/kernel/traps.c | 33 +++++++----------
 2 files changed, 25 insertions(+), 95 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index d879809d5c45..3d87b205d5f5 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -47,7 +47,6 @@
 /* This keeps a track of which one is crashing cpu. */
 int crashing_cpu = -1;
 static cpumask_t cpus_in_crash = CPU_MASK_NONE;
-cpumask_t cpus_in_sr = CPU_MASK_NONE;
 
 #define CRASH_HANDLER_MAX 3
 /* NULL terminated list of shutdown handles */
@@ -55,7 +54,6 @@ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
 static DEFINE_SPINLOCK(crash_handlers_lock);
 
 #ifdef CONFIG_SMP
-static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
 
 void crash_ipi_callback(struct pt_regs *regs)
 {
@@ -69,24 +67,9 @@ void crash_ipi_callback(struct pt_regs *regs)
 		crash_save_cpu(regs, cpu);
 	cpumask_set_cpu(cpu, &cpus_in_crash);
 
-	/*
-	 * Entered via soft-reset - could be the kdump
-	 * process is invoked using soft-reset or user activated
-	 * it if some CPU did not respond to an IPI.
-	 * For soft-reset, the secondary CPU can enter this func
-	 * twice. 1 - using IPI, and 2. soft-reset.
-	 * Tell the kexec CPU that entered via soft-reset and ready
-	 * to go down.
-	 */
-	if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
-		cpumask_clear_cpu(cpu, &cpus_in_sr);
-		atomic_inc(&enter_on_soft_reset);
-	}
-
 	/*
 	 * Starting the kdump boot.
 	 * This barrier is needed to make sure that all CPUs are stopped.
-	 * If not, soft-reset will be invoked to bring other CPUs.
 	 */
 	while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
 		cpu_relax();
@@ -103,25 +86,14 @@ void crash_ipi_callback(struct pt_regs *regs)
 	/* NOTREACHED */
 }
 
-/*
- * Wait until all CPUs are entered via soft-reset.
- */
-static void crash_soft_reset_check(int cpu)
-{
-	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-
-	cpumask_clear_cpu(cpu, &cpus_in_sr);
-	while (atomic_read(&enter_on_soft_reset) != ncpus)
-		cpu_relax();
-}
-
-
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	unsigned int msecs;
 
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
+	printk(KERN_EMERG "Sending IPI to other CPUs\n");
+
 	crash_send_ipi(crash_ipi_callback);
 	smp_wmb();
 
@@ -131,7 +103,6 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 * respond.
 	 * Delay of at least 10 seconds.
 	 */
-	printk(KERN_EMERG "Sending IPI to other cpus...\n");
 	msecs = 10000;
 	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
 		cpu_relax();
@@ -140,69 +111,36 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 	/* Would it be better to replace the trap vector here? */
 
-	/*
-	 * FIXME: In case if we do not get all CPUs, one possibility: ask the
-	 * user to do soft reset such that we get all.
-	 * Soft-reset will be used until better mechanism is implemented.
-	 */
 	if (cpumask_weight(&cpus_in_crash) < ncpus) {
-		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
+		printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n",
 			ncpus - cpumask_weight(&cpus_in_crash));
-		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
-		cpumask_clear(&cpus_in_sr);
-		atomic_set(&enter_on_soft_reset, 0);
-		while (cpumask_weight(&cpus_in_crash) < ncpus)
-			cpu_relax();
 	}
-	/*
-	 * Make sure all CPUs are entered via soft-reset if the kdump is
-	 * invoked using soft-reset.
-	 */
-	if (cpumask_test_cpu(cpu, &cpus_in_sr))
-		crash_soft_reset_check(cpu);
-	/* Leave the IPI callback set */
+
+	printk(KERN_EMERG "IPI complete\n");
 }
 
 /*
- * This function will be called by secondary cpus or by kexec cpu
- * if soft-reset is activated to stop some CPUs.
+ * This function will be called by secondary cpus.
  */
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	int cpu = smp_processor_id();
 	unsigned long flags;
-	int msecs = 5;
+	int msecs = 500;
 
 	local_irq_save(flags);
-	/* Wait 5ms if the kexec CPU is not entered yet. */
+
+	/* Wait 500ms for the primary crash CPU to signal its progress */
 	while (crashing_cpu < 0) {
 		if (--msecs < 0) {
-			/*
-			 * Either kdump image is not loaded or
-			 * kdump process is not started - Probably xmon
-			 * exited using 'x'(exit and recover) or
-			 * kexec_should_crash() failed for all running tasks.
-			 */
-			cpumask_clear_cpu(cpu, &cpus_in_sr);
+			/* No response, kdump image may not have been loaded */
 			local_irq_restore(flags);
 			return;
 		}
+
 		mdelay(1);
 		cpu_relax();
 	}
-	if (cpu == crashing_cpu) {
-		/*
-		 * Panic CPU will enter this func only via soft-reset.
-		 * Wait until all secondary CPUs entered and
-		 * then start kexec boot.
-		 */
-		crash_soft_reset_check(cpu);
-		cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
-		if (ppc_md.kexec_cpu_down)
-			ppc_md.kexec_cpu_down(1, 0);
-		machine_kexec(kexec_crash_image);
-		/* NOTREACHED */
-	}
+
 	crash_ipi_callback(regs);
 }
 
@@ -225,7 +163,6 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	cpumask_clear(&cpus_in_sr);
 }
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 91377870b26a..014f88f03d3f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -162,10 +162,20 @@ int die(const char *str, struct pt_regs *regs, long err)
 	printk("\n");
 	raw_spin_unlock_irqrestore(&die.lock, flags);
 
-	if (kexec_should_crash(current) ||
-		kexec_sr_activated(smp_processor_id()))
+	/*
+	 * A system reset (0x100) is a request to dump, so we always send
+	 * it through the crashdump code.
+	 */
+	if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
 		crash_kexec(regs);
-	crash_kexec_secondary(regs);
+
+		/*
+		 * We aren't the primary crash CPU. We need to send it
+		 * to a holding pattern to avoid it ending up in the panic
+		 * code.
+		 */
+		crash_kexec_secondary(regs);
+	}
 
 	/*
 	 * While our oops output is serialised by a spinlock, output
@@ -232,25 +242,8 @@ void system_reset_exception(struct pt_regs *regs)
 			return;
 	}
 
-#ifdef CONFIG_KEXEC
-	cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
-#endif
-
 	die("System Reset", regs, SIGABRT);
 
-	/*
-	 * Some CPUs when released from the debugger will execute this path.
-	 * These CPUs entered the debugger via a soft-reset. If the CPU was
-	 * hung before entering the debugger it will return to the hung
-	 * state when exiting this function.  This causes a problem in
-	 * kdump since the hung CPU(s) will not respond to the IPI sent
-	 * from kdump. To prevent the problem we call crash_kexec_secondary()
-	 * here. If a kdump had not been initiated or we exit the debugger
-	 * with the "exit and recover" command (x) crash_kexec_secondary()
-	 * will return after 5ms and the CPU returns to its previous state.
-	 */
-	crash_kexec_secondary(regs);
-
 	/* Must die if the interrupt is not recoverable */
 	if (!(regs->msr & MSR_RI))
 		panic("Unrecoverable System Reset");
-- 
cgit v1.2.2


From 07fe0c6132578186773e01ffb0f63ded222effe7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:11 +0000
Subject: powerpc/kdump: Use setjmp/longjmp to handle kdump and system reset
 recursion

We can handle recursion caused by system reset by reusing the crash
shutdown fault handler.

Since we don't have an OS triggerable NMI, if all CPUs don't make it
into kdump then we tell the user to issue a system reset. However if
we have a panic timeout set we cannot wait forever and must continue
the kdump.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 72 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 3d87b205d5f5..a8b6e2d705a4 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -53,6 +53,16 @@ static cpumask_t cpus_in_crash = CPU_MASK_NONE;
 static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
 static DEFINE_SPINLOCK(crash_handlers_lock);
 
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+	if (crash_shutdown_cpu == smp_processor_id())
+		longjmp(crash_shutdown_buf, 1);
+	return 0;
+}
+
 #ifdef CONFIG_SMP
 
 void crash_ipi_callback(struct pt_regs *regs)
@@ -89,14 +99,16 @@ void crash_ipi_callback(struct pt_regs *regs)
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	unsigned int msecs;
-
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+	int tries = 0;
+	int (*old_handler)(struct pt_regs *regs);
 
 	printk(KERN_EMERG "Sending IPI to other CPUs\n");
 
 	crash_send_ipi(crash_ipi_callback);
 	smp_wmb();
 
+again:
 	/*
 	 * FIXME: Until we will have the way to stop other CPUs reliably,
 	 * the crash CPU will send an IPI and wait for other CPUs to
@@ -111,12 +123,52 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 	/* Would it be better to replace the trap vector here? */
 
-	if (cpumask_weight(&cpus_in_crash) < ncpus) {
-		printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n",
-			ncpus - cpumask_weight(&cpus_in_crash));
+	if (cpumask_weight(&cpus_in_crash) >= ncpus) {
+		printk(KERN_EMERG "IPI complete\n");
+		return;
+	}
+
+	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+		ncpus - cpumask_weight(&cpus_in_crash));
+
+	/*
+	 * If we have a panic timeout set then we can't wait indefinitely
+	 * for someone to activate system reset. We also give up on the
+	 * second time through if system reset fail to work.
+	 */
+	if ((panic_timeout > 0) || (tries > 0))
+		return;
+
+	/*
+	 * A system reset will cause all CPUs to take an 0x100 exception.
+	 * The primary CPU returns here via setjmp, and the secondary
+	 * CPUs reexecute the crash_kexec_secondary path.
+	 */
+	old_handler = __debugger;
+	__debugger = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
+
+	if (setjmp(crash_shutdown_buf) == 0) {
+		printk(KERN_EMERG "Activate system reset (dumprestart) "
+				  "to stop other cpu(s)\n");
+
+		/*
+		 * A system reset will force all CPUs to execute the
+		 * crash code again. We need to reset cpus_in_crash so we
+		 * wait for everyone to do this.
+		 */
+		cpus_in_crash = CPU_MASK_NONE;
+		smp_mb();
+
+		while (cpumask_weight(&cpus_in_crash) < ncpus)
+			cpu_relax();
 	}
 
-	printk(KERN_EMERG "IPI complete\n");
+	crash_shutdown_cpu = -1;
+	__debugger = old_handler;
+
+	tries++;
+	goto again;
 }
 
 /*
@@ -245,16 +297,6 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
 }
 EXPORT_SYMBOL(crash_shutdown_unregister);
 
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-static int crash_shutdown_cpu = -1;
-
-static int handle_fault(struct pt_regs *regs)
-{
-	if (crash_shutdown_cpu == smp_processor_id())
-		longjmp(crash_shutdown_buf, 1);
-	return 0;
-}
-
 void default_machine_crash_shutdown(struct pt_regs *regs)
 {
 	unsigned int i;
-- 
cgit v1.2.2


From 8c27474a252e84e8a71ae4a43e18f0193a08e3f7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:12 +0000
Subject: powerpc: Cleanup crash/kexec code

Remove some unnecessary defines and fix some spelling mistakes.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index a8b6e2d705a4..d4467557b00e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -10,41 +10,27 @@
  *
  */
 
-#undef DEBUG
-
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/reboot.h>
 #include <linux/kexec.h>
-#include <linux/bootmem.h>
 #include <linux/export.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/types.h>
-#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/machdep.h>
 #include <asm/kexec.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
-#include <asm/firmware.h>
 #include <asm/smp.h>
 #include <asm/system.h>
 #include <asm/setjmp.h>
 
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/* This keeps a track of which one is crashing cpu. */
+/* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
 static cpumask_t cpus_in_crash = CPU_MASK_NONE;
 
@@ -201,7 +187,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	/*
-	 * move the secondarys to us so that we can copy
+	 * move the secondaries to us so that we can copy
 	 * the new kernel 0-0x100 safely
 	 *
 	 * do this if kexec in setup.c ?
@@ -302,7 +288,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	unsigned int i;
 	int (*old_handler)(struct pt_regs *regs);
 
-
 	/*
 	 * This function is only called after the system
 	 * has panicked or is otherwise in a critical state.
@@ -328,7 +313,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	machine_kexec_mask_interrupts();
 
 	/*
-	 * Call registered shutdown routines savely.  Swap out
+	 * Call registered shutdown routines safely.  Swap out
 	 * __debugger_fault_handler, and replace on exit.
 	 */
 	old_handler = __debugger_fault_handler;
-- 
cgit v1.2.2


From 760ca4dc90e624eb8f7ff85a5925151e25577758 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:13 +0000
Subject: powerpc: Rework die()

Our die() code was based off a very old x86 version. Update it to
mirror the current x86 code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/traps.c | 128 +++++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 55 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 014f88f03d3f..c091527efd89 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void)
 static inline void pmac_backlight_unblank(void) { }
 #endif
 
-int die(const char *str, struct pt_regs *regs, long err)
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+static int die_counter;
+
+static unsigned __kprobes long oops_begin(struct pt_regs *regs)
 {
-	static struct {
-		raw_spinlock_t lock;
-		u32 lock_owner;
-		int lock_owner_depth;
-	} die = {
-		.lock =			__RAW_SPIN_LOCK_UNLOCKED(die.lock),
-		.lock_owner =		-1,
-		.lock_owner_depth =	0
-	};
-	static int die_counter;
+	int cpu;
 	unsigned long flags;
 
 	if (debugger(regs))
@@ -117,50 +113,37 @@ int die(const char *str, struct pt_regs *regs, long err)
 
 	oops_enter();
 
-	if (die.lock_owner != raw_smp_processor_id()) {
-		console_verbose();
-		raw_spin_lock_irqsave(&die.lock, flags);
-		die.lock_owner = smp_processor_id();
-		die.lock_owner_depth = 0;
-		bust_spinlocks(1);
-		if (machine_is(powermac))
-			pmac_backlight_unblank();
-	} else {
-		local_save_flags(flags);
-	}
-
-	if (++die.lock_owner_depth < 3) {
-		printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
-		printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-		printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-		printk("DEBUG_PAGEALLOC ");
-#endif
-#ifdef CONFIG_NUMA
-		printk("NUMA ");
-#endif
-		printk("%s\n", ppc_md.name ? ppc_md.name : "");
-
-		if (notify_die(DIE_OOPS, str, regs, err, 255,
-			       SIGSEGV) == NOTIFY_STOP)
-			return 1;
-
-		print_modules();
-		show_regs(regs);
-	} else {
-		printk("Recursive die() failure, output suppressed\n");
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			arch_spin_lock(&die_lock);
 	}
+	die_nest_count++;
+	die_owner = cpu;
+	console_verbose();
+	bust_spinlocks(1);
+	if (machine_is(powermac))
+		pmac_backlight_unblank();
+	return flags;
+}
 
+static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+			       int signr)
+{
 	bust_spinlocks(0);
-	die.lock_owner = -1;
+	die_owner = -1;
 	add_taint(TAINT_DIE);
+	die_nest_count--;
 	oops_exit();
 	printk("\n");
-	raw_spin_unlock_irqrestore(&die.lock, flags);
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		arch_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
 
 	/*
 	 * A system reset (0x100) is a request to dump, so we always send
@@ -177,6 +160,9 @@ int die(const char *str, struct pt_regs *regs, long err)
 		crash_kexec_secondary(regs);
 	}
 
+	if (!signr)
+		return;
+
 	/*
 	 * While our oops output is serialised by a spinlock, output
 	 * from panic() called below can race and corrupt it. If we
@@ -190,15 +176,46 @@ int die(const char *str, struct pt_regs *regs, long err)
 
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
-
 	if (panic_on_oops)
 		panic("Fatal exception");
+	do_exit(signr);
+}
 
-	do_exit(err);
+static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+	printk("SMP NR_CPUS=%d ", NR_CPUS);
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC ");
+#endif
+#ifdef CONFIG_NUMA
+	printk("NUMA ");
+#endif
+	printk("%s\n", ppc_md.name ? ppc_md.name : "");
+
+	if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
+		return 1;
+
+	print_modules();
+	show_regs(regs);
 
 	return 0;
 }
 
+void die(const char *str, struct pt_regs *regs, long err)
+{
+	unsigned long flags = oops_begin(regs);
+
+	if (__die(str, regs, err))
+		err = 0;
+	oops_end(flags, regs, err);
+}
+
 void user_single_step_siginfo(struct task_struct *tsk,
 				struct pt_regs *regs, siginfo_t *info)
 {
@@ -217,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 			"at %016lx nip %016lx lr %016lx code %x\n";
 
 	if (!user_mode(regs)) {
-		if (die("Exception in kernel mode", regs, signr))
-			return;
-	} else if (show_unhandled_signals &&
-		   unhandled_signal(current, signr)) {
+		die("Exception in kernel mode", regs, signr);
+		return;
+	}
+
+	if (show_unhandled_signals && unhandled_signal(current, signr)) {
 		printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				   current->comm, current->pid, signr,
 				   addr, regs->nip, regs->link, code);
-- 
cgit v1.2.2


From 549e88a134b3b393a4312e8d76628b9260eee57f Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:16 +0000
Subject: powerpc/kdump: Delay before sending IPI on a system reset

If we enter the kdump code via system reset, wait a bit before
sending the IPI to capture all secondary CPUs. Without it we race
with the hypervisor that is issuing the system reset to each CPU.
If the IPI gets there first the system reset oops output then shows
the register state of the IPI handler which is not what we want.

I took the opportunity to add defines for all the various delays
we have. There's no need for cpu_relax when we are doing an mdelay,
so remove them too.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index d4467557b00e..b942980e9650 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -30,6 +30,20 @@
 #include <asm/system.h>
 #include <asm/setjmp.h>
 
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT		500
+#define SECONDARY_TIMEOUT	1000
+
+#define IPI_TIMEOUT		10000
+#define REAL_MODE_TIMEOUT	10000
+
 /* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
 static cpumask_t cpus_in_crash = CPU_MASK_NONE;
@@ -99,11 +113,9 @@ again:
 	 * FIXME: Until we will have the way to stop other CPUs reliably,
 	 * the crash CPU will send an IPI and wait for other CPUs to
 	 * respond.
-	 * Delay of at least 10 seconds.
 	 */
-	msecs = 10000;
+	msecs = IPI_TIMEOUT;
 	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
-		cpu_relax();
 		mdelay(1);
 	}
 
@@ -163,11 +175,11 @@ again:
 void crash_kexec_secondary(struct pt_regs *regs)
 {
 	unsigned long flags;
-	int msecs = 500;
+	int msecs = SECONDARY_TIMEOUT;
 
 	local_irq_save(flags);
 
-	/* Wait 500ms for the primary crash CPU to signal its progress */
+	/* Wait for the primary crash CPU to signal its progress */
 	while (crashing_cpu < 0) {
 		if (--msecs < 0) {
 			/* No response, kdump image may not have been loaded */
@@ -176,7 +188,6 @@ void crash_kexec_secondary(struct pt_regs *regs)
 		}
 
 		mdelay(1);
-		cpu_relax();
 	}
 
 	crash_ipi_callback(regs);
@@ -211,7 +222,7 @@ static void crash_kexec_wait_realmode(int cpu)
 	unsigned int msecs;
 	int i;
 
-	msecs = 10000;
+	msecs = REAL_MODE_TIMEOUT;
 	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
 		if (i == cpu)
 			continue;
@@ -306,6 +317,14 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 */
 	crashing_cpu = smp_processor_id();
 	crash_save_cpu(regs, crashing_cpu);
+
+	/*
+	 * If we came in via system reset, wait a while for the secondary
+	 * CPUs to enter.
+	 */
+	if (TRAP(regs) == 0x100)
+		mdelay(PRIMARY_TIMEOUT);
+
 	crash_kexec_prepare_cpus(crashing_cpu);
 	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 	crash_kexec_wait_realmode(crashing_cpu);
-- 
cgit v1.2.2


From 2440c01e10f07adcbc2094ba12ae4ad6094bd2b6 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 30 Nov 2011 00:23:17 +0000
Subject: powerpc/kdump: Only save CPU state first time through the secondary
 CPU capture code

We might enter the secondary CPU capture code twice, eg if we have to
unstick some CPUs with a system reset. In this case we don't want to
overwrite the state on CPUs that had made it into the capture code OK,
so use the cpus_state_saved cpumask for that and make it local to
crash_ipi_callback.

For controlling progress now use atomic_t cpus_in_crash to count how
many CPUs have made it into the kdump code, and time_to_dump to tell
everyone it's time to dump.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index b942980e9650..28be3452e67a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -46,7 +46,8 @@
 
 /* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
-static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+static atomic_t cpus_in_crash;
+static int time_to_dump;
 
 #define CRASH_HANDLER_MAX 3
 /* NULL terminated list of shutdown handles */
@@ -67,21 +68,27 @@ static int handle_fault(struct pt_regs *regs)
 
 void crash_ipi_callback(struct pt_regs *regs)
 {
+	static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
 	int cpu = smp_processor_id();
 
 	if (!cpu_online(cpu))
 		return;
 
 	hard_irq_disable();
-	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
 		crash_save_cpu(regs, cpu);
-	cpumask_set_cpu(cpu, &cpus_in_crash);
+		cpumask_set_cpu(cpu, &cpus_state_saved);
+	}
+
+	atomic_inc(&cpus_in_crash);
+	smp_mb__after_atomic_inc();
 
 	/*
 	 * Starting the kdump boot.
 	 * This barrier is needed to make sure that all CPUs are stopped.
 	 */
-	while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
+	while (!time_to_dump)
 		cpu_relax();
 
 	if (ppc_md.kexec_cpu_down)
@@ -115,19 +122,18 @@ again:
 	 * respond.
 	 */
 	msecs = IPI_TIMEOUT;
-	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
+	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
 		mdelay(1);
-	}
 
 	/* Would it be better to replace the trap vector here? */
 
-	if (cpumask_weight(&cpus_in_crash) >= ncpus) {
+	if (atomic_read(&cpus_in_crash) >= ncpus) {
 		printk(KERN_EMERG "IPI complete\n");
 		return;
 	}
 
 	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
-		ncpus - cpumask_weight(&cpus_in_crash));
+		ncpus - atomic_read(&cpus_in_crash));
 
 	/*
 	 * If we have a panic timeout set then we can't wait indefinitely
@@ -155,10 +161,10 @@ again:
 		 * crash code again. We need to reset cpus_in_crash so we
 		 * wait for everyone to do this.
 		 */
-		cpus_in_crash = CPU_MASK_NONE;
+		atomic_set(&cpus_in_crash, 0);
 		smp_mb();
 
-		while (cpumask_weight(&cpus_in_crash) < ncpus)
+		while (atomic_read(&cpus_in_crash) < ncpus)
 			cpu_relax();
 	}
 
@@ -316,7 +322,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 * such that another IPI will not be sent.
 	 */
 	crashing_cpu = smp_processor_id();
-	crash_save_cpu(regs, crashing_cpu);
 
 	/*
 	 * If we came in via system reset, wait a while for the secondary
@@ -326,7 +331,11 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 		mdelay(PRIMARY_TIMEOUT);
 
 	crash_kexec_prepare_cpus(crashing_cpu);
-	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+	crash_save_cpu(regs, crashing_cpu);
+
+	time_to_dump = 1;
+
 	crash_kexec_wait_realmode(crashing_cpu);
 
 	machine_kexec_mask_interrupts();
-- 
cgit v1.2.2


From 816cb49a4baf0e4b27730bcc31e5d35a1eadd283 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Tue, 29 Nov 2011 15:38:50 +0000
Subject: powerpc/ps3: Fix hcall lv1_get_version_info

The lv1_get_version_info hcall takes 2, not 1 output
arguments.  Adjust the lv1 hcall table and all calls.

Usage:

  int lv1_get_version_info(u64 *version_number, u64 *vendor_id)

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 2ff4f5e59620..701d4aceb4f4 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -186,8 +186,8 @@ notrace void arch_local_irq_restore(unsigned long en)
 	 * Any HV call will have this side effect.
 	 */
 	if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
-		u64 tmp;
-		lv1_get_version_info(&tmp);
+		u64 tmp, tmp2;
+		lv1_get_version_info(&tmp, &tmp2);
 	}
 
 	__hard_irq_enable();
-- 
cgit v1.2.2


From 33392640424ff775e7d82eab4a51af7b8cc9384d Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 4 Dec 2011 13:13:58 +0000
Subject: powerpc/pseries: Increase minimum RMO size from 64MB to 256MB

The minimum RMO size field in ibm,client-architecture is currently
ignored, but a future firmware version will rectify that. Since we
always get at least 128MB of RMO right now, asking for 64MB is
likely to result in boot failures.

We should bump it to at least 128MB, but considering all the boot
issues we have on 128MB RMO boxes and all new machines have virtual
RMO, we may as well set our minimum to 256MB.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f4f9f2f14322..df47316f1aee 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -742,7 +742,7 @@ static unsigned char ibm_architecture_vec[] = {
 	W(0xffffffff),			/* virt_base */
 	W(0xffffffff),			/* virt_size */
 	W(0xffffffff),			/* load_base */
-	W(64),				/* 64MB min RMA */
+	W(256),				/* 256MB min RMA */
 	W(0xffffffff),			/* full client load */
 	0,				/* min RMA percentage of total RAM */
 	48,				/* max log_2(hash table size) */
-- 
cgit v1.2.2


From cba313da5c8ddbe6bba74c9f2b8f6d9e0bc0e723 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 5 Dec 2011 19:35:32 +0000
Subject: powerpc/powernv: Fix problems in onlining CPUs

At present, on the powernv platform, if you off-line a CPU that was
online, and then try to on-line it again, the kernel generates a
warning message "OPAL Error -1 starting CPU n".  Furthermore, if the
CPU is a secondary thread that was used by KVM while it was off-line,
the CPU fails to come online.

The first problem is fixed by only calling OPAL to start the CPU the
first time it is on-lined, as indicated by the cpu_start field of its
PACA being zero.  The second problem is fixed by restoring the
cpu_start field to 1 instead of 0 when using the CPU within KVM.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index cf9c69b9189c..d4be7bb3dbdf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -65,7 +65,7 @@ BEGIN_FTR_SECTION
 	lbz	r0,PACAPROCSTART(r13)
 	cmpwi	r0,0x80
 	bne	1f
-	li	r0,0
+	li	r0,1
 	stb	r0,PACAPROCSTART(r13)
 	b	kvm_start_guest
 1:
-- 
cgit v1.2.2


From 2fde6d20bb75b53f1ead383b4713f95d0d6d9f59 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 5 Dec 2011 19:47:26 +0000
Subject: powerpc: Provide a way for KVM to indicate that NV GPR values are
 lost

This fixes a problem where a CPU thread coming out of nap mode can
think it has valid values in the nonvolatile GPRs (r14 - r31) as saved
away in power7_idle, but in fact the values have been trashed because
the thread was used for KVM in the mean time.  The result is that the
thread crashes because code that called power7_idle (e.g.,
pnv_smp_cpu_kill_self()) goes to use values in registers that have
been trashed.

The bit field in SRR1 that tells whether state was lost only reflects
the most recent nap, which may not have been the nap instruction in
power7_idle.  So we need an extra PACA field to indicate that state
has been lost even if SRR1 indicates that the most recent nap didn't
lose state.  We clear this field when saving the state in power7_idle,
we set it to a non-zero value when we use the thread for KVM, and we
test it in power7_wakeup_noloss.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/asm-offsets.c | 1 +
 arch/powerpc/kernel/idle_power7.S | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7c5324f1ec9c..04caee7d9bc1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -208,6 +208,7 @@ int main(void)
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+	DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 3a70845a51c7..fcdff198da4b 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -54,6 +54,7 @@ _GLOBAL(power7_idle)
 	li	r0,0
 	stb	r0,PACASOFTIRQEN(r13)	/* we'll hard-enable shortly */
 	stb	r0,PACAHARDIRQEN(r13)
+	stb	r0,PACA_NAPSTATELOST(r13)
 
 	/* Continue saving state */
 	SAVE_GPR(2, r1)
@@ -86,6 +87,9 @@ _GLOBAL(power7_wakeup_loss)
 	rfid
 
 _GLOBAL(power7_wakeup_noloss)
+	lbz	r0,PACA_NAPSTATELOST(r13)
+	cmpwi	r0,0
+	bne	.power7_wakeup_loss
 	ld	r1,PACAR1(r13)
 	ld	r4,_MSR(r1)
 	ld	r5,_NIP(r1)
-- 
cgit v1.2.2


From fe091c208a40299fba40e62292a610fb91e44b4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:07 -0800
Subject: memblock: Kill memblock_init()

memblock_init() initializes arrays for regions and memblock itself;
however, all these can be done with struct initializers and
memblock_init() can be removed.  This patch kills memblock_init() and
initializes memblock with struct initializer.

The only difference is that the first dummy entries don't have .nid
set to MAX_NUMNODES initially.  This doesn't cause any behavior
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/powerpc/kernel/prom.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503b..a7ee83e6eb17 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
-
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
-- 
cgit v1.2.2


From 6fbef13c4feaf0c5576e2315f4d2999c4b670c88 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:07 -0800
Subject: powerpc: Cleanup memblock usage

* early_init_devtree(): Total memory size is aligned to PAGE_SIZE;
  however, alignment isn't enforced if memory_limit is explicitly
  specified.  Simplify the logic and always apply PAGE_SIZE alignment.

* MMU_init(): memblock regions is truncated by directly modifying
  memblock.memory.cnt.  This is incomplete (reserved array is not
  truncated) and unnecessarily low level hindering further memblock
  improvments.  Use memblock_enforce_memory_limit() instead.

* wii_memory_fixups(): Unnecessarily low level direct manipulation of
  memblock regions.  The same result can be achieved using properly
  abstracted operations.  Reimplement using memblock API.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 arch/powerpc/kernel/prom.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a7ee83e6eb17..28500d4f29d9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -754,17 +754,12 @@ void __init early_init_devtree(void *params)
 	early_reserve_mem();
 	phyp_dump_reserve_mem();
 
-	limit = memory_limit;
-	if (! limit) {
-		phys_addr_t memsize;
-
-		/* Ensure that total memory size is page-aligned, because
-		 * otherwise mark_bootmem() gets upset. */
-		memblock_analyze();
-		memsize = memblock_phys_mem_size();
-		if ((memsize & PAGE_MASK) != memsize)
-			limit = memsize & PAGE_MASK;
-	}
+	/*
+	 * Ensure that total memory size is page-aligned, because otherwise
+	 * mark_bootmem() gets upset.
+	 */
+	memblock_analyze();
+	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
 	memblock_analyze();
-- 
cgit v1.2.2


From 1aadc0560f46530f8a0f11055285b876a8a31770 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:08 -0800
Subject: memblock: s/memblock_analyze()/memblock_allow_resize()/ and update
 users

The only function of memblock_analyze() is now allowing resize of
memblock region arrays.  Rename it to memblock_allow_resize() and
update its users.

* The following users remain the same other than renaming.

  arm/mm/init.c::arm_memblock_init()
  microblaze/kernel/prom.c::early_init_devtree()
  powerpc/kernel/prom.c::early_init_devtree()
  openrisc/kernel/prom.c::early_init_devtree()
  sh/mm/init.c::paging_init()
  sparc/mm/init_64.c::paging_init()
  unicore32/mm/init.c::uc32_memblock_init()

* In the following users, analyze was used to update total size which
  is no longer necessary.

  powerpc/kernel/machine_kexec.c::reserve_crashkernel()
  powerpc/kernel/prom.c::early_init_devtree()
  powerpc/mm/init_32.c::MMU_init()
  powerpc/mm/tlb_nohash.c::__early_init_mmu()
  powerpc/platforms/ps3/mm.c::ps3_mm_add_memory()
  powerpc/platforms/embedded6xx/wii.c::wii_memory_fixups()
  sh/kernel/machine_kexec.c::reserve_crashkernel()

* x86/kernel/e820.c::memblock_x86_fill() was directly setting
  memblock_can_resize before populating memblock and calling analyze
  afterwards.  Call memblock_allow_resize() before start populating.

memblock_can_resize is now static inside memblock.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/powerpc/kernel/machine_kexec.c | 3 ---
 arch/powerpc/kernel/prom.c          | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..a2158a395d96 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of memblock_phys_mem_size() */
-	memblock_analyze();
-
 	/* use common parsing */
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 28500d4f29d9..abe405dab34d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -758,11 +758,10 @@ void __init early_init_devtree(void *params)
 	 * Ensure that total memory size is page-aligned, because otherwise
 	 * mark_bootmem() gets upset.
 	 */
-	memblock_analyze();
 	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
-- 
cgit v1.2.2


From df777bd39a266637d1765d48043493489418e75b Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Wed, 30 Nov 2011 21:39:23 +0000
Subject: powerpc/476fpe: Add 476fpe SoC code

Based on original work by David 'Shaggy' Kleikamp.

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/cputable.c | 14 ++++++++++++++
 arch/powerpc/kernel/head_44x.S |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ce59693835c7..81db9e2a8a20 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1843,6 +1843,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_47x,
 		.platform		= "ppc470",
 	},
+	{ /* 476fpe */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff50000,
+		.cpu_name		= "476fpe",
+		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x |
+			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
 	{ /* 476 iss */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00050000,
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b725dab0f88a..bb7a9c7a4c05 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -732,6 +732,8 @@ _GLOBAL(init_cpu_state)
 	/* We use the PVR to differenciate 44x cores from 476 */
 	mfspr	r3,SPRN_PVR
 	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476FPE@h
+	beq	head_start_47x
 	cmplwi	cr0,r3,PVR_476@h
 	beq	head_start_47x
 	cmplwi	cr0,r3,PVR_476_ISS@h
-- 
cgit v1.2.2


From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Oct 2011 18:22:06 +0200
Subject: nohz: Separate out irq exit and idle loop dyntick logic

The tick_nohz_stop_sched_tick() function, which tries to delay
the next timer tick as long as possible, can be called from two
places:

- From the idle loop to start the dytick idle mode
- From interrupt exit if we have interrupted the dyntick
idle mode, so that we reprogram the next tick event in
case the irq changed some internal state that requires this
action.

There are only few minor differences between both that
are handled by that function, driven by the ts->inidle
cpu variable and the inidle parameter. The whole guarantees
that we only update the dyntick mode on irq exit if we actually
interrupted the dyntick idle mode, and that we enter in RCU extended
quiescent state from idle loop entry only.

Split this function into:

- tick_nohz_idle_enter(), which sets ts->inidle to 1, enters
dynticks idle mode unconditionally if it can, and enters into RCU
extended quiescent state.

- tick_nohz_irq_exit() which only updates the dynticks idle mode
when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called).

To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed
into tick_nohz_idle_exit().

This simplifies the code and micro-optimize the irq exit path (no need
for local_irq_save there). This also prepares for the split between
dynticks and rcu extended quiescent state logics. We'll need this split to
further fix illegal uses of RCU in extended quiescent states in the idle
loop.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/powerpc/kernel/idle.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad58..878572f70ac5 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -56,7 +56,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +93,7 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
-- 
cgit v1.2.2


From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 8 Oct 2011 16:01:00 +0200
Subject: nohz: Allow rcu extended quiescent state handling seperately from
 tick stop

It is assumed that rcu won't be used once we switch to tickless
mode and until we restart the tick. However this is not always
true, as in x86-64 where we dereference the idle notifiers after
the tick is stopped.

To prepare for fixing this, add two new APIs:
tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu().

If no use of RCU is made in the idle loop between
tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch
must instead call the new *_norcu() version such that the arch doesn't
need to call rcu_idle_enter() and rcu_idle_exit().

Otherwise the arch must call tick_nohz_enter_idle() and
tick_nohz_exit_idle() and also call explicitly:

- rcu_idle_enter() after its last use of RCU before the CPU is put
to sleep.
- rcu_idle_exit() before the first use of RCU after the CPU is woken
up.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/idle.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 878572f70ac5..2e782a36d8f2 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -56,7 +56,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +93,7 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
-- 
cgit v1.2.2


From a7b152d5342c06e81ab0cf7d18345f69fa7e56b5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 13 Oct 2011 19:05:12 -0700
Subject: powerpc: Tell RCU about idle after hcall tracing

The PowerPC pSeries platform (CONFIG_PPC_PSERIES=y) enables
hypervisor-call tracing for CONFIG_TRACEPOINTS=y kernels.  One of the
hypervisor calls that is traced is the H_CEDE call in the idle loop
that tells the hypervisor that this OS instance no longer needs the
current CPU.  However, tracing uses RCU, so this combination of kernel
configuration variables needs to avoid telling RCU about the current CPU's
idleness until after the H_CEDE-entry tracing completes on the one hand,
and must tell RCU that the the current CPU is no longer idle before the
H_CEDE-exit tracing starts.

In all other cases, it suffices to inform RCU of CPU idleness upon
idle-loop entry and exit.

This commit makes the required adjustments.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/powerpc/kernel/idle.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 2e782a36d8f2..3cd73d1fc427 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -46,6 +46,12 @@ static int __init powersave_off(char *arg)
 }
 __setup("powersave=off", powersave_off);
 
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS)
+static const bool idle_uses_rcu = 1;
+#else
+static const bool idle_uses_rcu;
+#endif
+
 /*
  * The body of the idle task.
  */
@@ -56,7 +62,10 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		if (idle_uses_rcu)
+			tick_nohz_idle_enter();
+		else
+			tick_nohz_idle_enter_norcu();
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +102,10 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit_norcu();
+		if (idle_uses_rcu)
+			tick_nohz_idle_exit();
+		else
+			tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
-- 
cgit v1.2.2


From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 17 Nov 2011 18:48:14 +0100
Subject: nohz: Remove tick_nohz_idle_enter_norcu() /
 tick_nohz_idle_exit_norcu()

Those two APIs were provided to optimize the calls of
tick_nohz_idle_enter() and rcu_idle_enter() into a single
irq disabled section. This way no interrupt happening in-between would
needlessly process any RCU job.

Now we are talking about an optimization for which benefits
have yet to be measured. Let's start simple and completely decouple
idle rcu and dyntick idle logics to simplify.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/idle.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 3cd73d1fc427..9c3cd490b1bd 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -62,10 +62,10 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		if (idle_uses_rcu)
-			tick_nohz_idle_enter();
-		else
-			tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		if (!idle_uses_rcu)
+			rcu_idle_enter();
+
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -102,10 +102,9 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		if (idle_uses_rcu)
-			tick_nohz_idle_exit();
-		else
-			tick_nohz_idle_exit_norcu();
+		if (!idle_uses_rcu)
+			rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
-- 
cgit v1.2.2


From 9f5072d4f63f28d30d343573830ac6c85fc0deff Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Fri, 9 Dec 2011 11:35:08 +0000
Subject: powerpc: Fix wrong divisor in usecs_to_cputime

Commit d57af9b (taskstats: use real microsecond granularity for CPU times)
renamed msecs_to_cputime to usecs_to_cputime, but failed to update all
numbers on the way.  This causes nonsensical cpu idle/iowait values to be
displayed in /proc/stat (the only user of usecs_to_cputime so far).

This also renames __cputime_msec_factor to __cputime_usec_factor, adapting
its value and using it directly in cputime_to_usecs instead of doing two
multiplications.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 9754743db8b9..567dd7c3ac2a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -158,13 +158,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Factors for converting from cputime_t (timebase ticks) to
- * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
  * These are all stored as 0.64 fixed-point binary fractions.
  */
 u64 __cputime_jiffies_factor;
 EXPORT_SYMBOL(__cputime_jiffies_factor);
-u64 __cputime_msec_factor;
-EXPORT_SYMBOL(__cputime_msec_factor);
+u64 __cputime_usec_factor;
+EXPORT_SYMBOL(__cputime_usec_factor);
 u64 __cputime_sec_factor;
 EXPORT_SYMBOL(__cputime_sec_factor);
 u64 __cputime_clockt_factor;
@@ -182,8 +182,8 @@ static void calc_cputime_factors(void)
 
 	div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
 	__cputime_jiffies_factor = res.result_low;
-	div128_by_32(1000, 0, tb_ticks_per_sec, &res);
-	__cputime_msec_factor = res.result_low;
+	div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
+	__cputime_usec_factor = res.result_low;
 	div128_by_32(1, 0, tb_ticks_per_sec, &res);
 	__cputime_sec_factor = res.result_low;
 	div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
-- 
cgit v1.2.2


From 64968f60e73d7b3f9fca1ca5cd985d75b2cbca44 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 13 Dec 2011 17:54:13 +0000
Subject: powerpc: Only use initrd_end as the limit for alloc_bottom if it's
 inside the RMO.

As the kernels and initrd's get bigger boot-loaders and possibly
kexec-tools will need to place the initrd outside the RMO.  When this
happens we end up with no lowmem and the boot doesn't get very far.

Only use initrd_end as the limit for alloc_bottom if it's inside the
RMO.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index df47316f1aee..32b5b05082ea 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1224,14 +1224,6 @@ static void __init prom_init_mem(void)
 
 	RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000);
 
-	/* Check if we have an initrd after the kernel, if we do move our bottom
-	 * point to after it
-	 */
-	if (RELOC(prom_initrd_start)) {
-		if (RELOC(prom_initrd_end) > RELOC(alloc_bottom))
-			RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
-	}
-
 	/*
 	 * If prom_memory_limit is set we reduce the upper limits *except* for
 	 * alloc_top_high. This must be the real top of RAM so we can put
@@ -1269,6 +1261,15 @@ static void __init prom_init_mem(void)
 	RELOC(alloc_top) = RELOC(rmo_top);
 	RELOC(alloc_top_high) = RELOC(ram_top);
 
+	/*
+	 * Check if we have an initrd after the kernel but still inside
+	 * the RMO.  If we do move our bottom point to after it.
+	 */
+	if (RELOC(prom_initrd_start) &&
+	    RELOC(prom_initrd_start) < RELOC(rmo_top) &&
+	    RELOC(prom_initrd_end) > RELOC(alloc_bottom))
+		RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
+
 	prom_printf("memory layout at init:\n");
 	prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
 	prom_printf("  alloc_bottom : %x\n", RELOC(alloc_bottom));
-- 
cgit v1.2.2


From 3f53638c805f75989f4b4be07efcfd173cdd5e2d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 14 Dec 2011 13:55:11 +0000
Subject: powerpc: Fix old bug in prom_init setting of the color

We have an array of 16 entries and a loop of 32 iterations... oops.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 32b5b05082ea..55b4080a821e 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2080,7 +2080,7 @@ static void __init prom_check_displays(void)
 		/* Setup a usable color table when the appropriate
 		 * method is available. Should update this to set-colors */
 		clut = RELOC(default_colors);
-		for (i = 0; i < 32; i++, clut += 3)
+		for (i = 0; i < 16; i++, clut += 3)
 			if (prom_set_color(ih, i, clut[0], clut[1],
 					   clut[2]) != 0)
 				break;
-- 
cgit v1.2.2


From 0f890c8d205e47f7cb0d381ffba582a170fd4f72 Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Wed, 14 Dec 2011 22:57:15 +0000
Subject: powerpc: Rename mapping based RELOCATABLE to DYNAMIC_MEMSTART for
 BookE

The current implementation of CONFIG_RELOCATABLE in BookE is based
on mapping the page aligned kernel load address to KERNELBASE. This
approach however is not enough for platforms, where the TLB page size
is large (e.g, 256M on 44x). So we are renaming the RELOCATABLE used
currently in BookE to DYNAMIC_MEMSTART to reflect the actual method.

The CONFIG_RELOCATABLE for PPC32(BookE) based on processing of the
dynamic relocations will be introduced in the later in the patch series.

This change would allow the use of the old method of RELOCATABLE for
platforms which can afford to enforce the page alignment (platforms with
smaller TLB size).

Changes since v3:

* Introduced a new config, NONSTATIC_KERNEL, to denote a kernel which is
  either a RELOCATABLE or DYNAMIC_MEMSTART(Suggested by: Josh Boyer)

Suggested-by: Scott Wood <scottwood@freescale.com>
Tested-by: Scott Wood <scottwood@freescale.com>

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Scott Wood <scottwood@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linux ppc dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/crash_dump.c     | 4 ++--
 arch/powerpc/kernel/head_44x.S       | 4 +++-
 arch/powerpc/kernel/head_fsl_booke.S | 2 +-
 arch/powerpc/kernel/machine_kexec.c  | 2 +-
 arch/powerpc/kernel/prom_init.c      | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 424afb6b8fba..b3ba5163eae2 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -28,7 +28,7 @@
 #define DBG(fmt...)
 #endif
 
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
 void __init reserve_kdump_trampoline(void)
 {
 	memblock_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -67,7 +67,7 @@ void __init setup_kdump_trampoline(void)
 
 	DBG(" <- setup_kdump_trampoline()\n");
 }
-#endif /* CONFIG_RELOCATABLE */
+#endif /* CONFIG_NONSTATIC_KERNEL */
 
 static int __init parse_savemaxmem(char *p)
 {
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index bb7a9c7a4c05..d7a1debda10b 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -86,8 +86,10 @@ _ENTRY(_start);
 
 	bl	early_init
 
-#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_DYNAMIC_MEMSTART
 	/*
+	 * Mapping based, page aligned dynamic kernel loading.
+	 *
 	 * r25 will contain RPN/ERPN for the start address of memory
 	 *
 	 * Add the difference between KERNELBASE and PAGE_OFFSET to the
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 9f5d210ddf3f..d5d78c4ceef6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -197,7 +197,7 @@ _ENTRY(__early_start)
 
 	bl	early_init
 
-#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_DYNAMIC_MEMSTART
 	lis	r3,kernstart_addr@ha
 	la	r3,kernstart_addr@l(r3)
 #ifdef CONFIG_PHYS_64BIT
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..ec50bb965538 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -128,7 +128,7 @@ void __init reserve_crashkernel(void)
 
 	crash_size = resource_size(&crashk_res);
 
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 55b4080a821e..eca626ea3f23 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2845,7 +2845,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	RELOC(of_platform) = prom_find_machine_type();
 	prom_printf("Detected machine type: %x\n", RELOC(of_platform));
 
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
 	/* Bail if this is a kdump kernel. */
 	if (PHYSICAL_START > 0)
 		prom_panic("Error: You can't boot a kdump kernel from OF!\n");
-- 
cgit v1.2.2


From 239132454583d474932d8835f87a244f6f1bff9e Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Wed, 14 Dec 2011 22:57:57 +0000
Subject: powerpc/44x: Enable DYNAMIC_MEMSTART for 440x

DYNAMIC_MEMSTART(old RELOCATABLE) was restricted only to PPC_47x variants
of 44x. This patch enables DYNAMIC_MEMSTART for 440x based chipsets.

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linux ppc dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/head_44x.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index d7a1debda10b..0878bf5d8a68 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -804,12 +804,24 @@ skpinv:	addi	r4,r4,1				/* Increment */
 /*
  * Configure and load pinned entry into TLB slot 63.
  */
+#ifdef CONFIG_DYNAMIC_MEMSTART
+
+	/* Read the XLAT entry for our current mapping */
+	tlbre	r25,r23,PPC44x_TLB_XLAT
+
+	lis	r3,KERNELBASE@h
+	ori	r3,r3,KERNELBASE@l
+
+	/* Use our current RPN entry */
+	mr	r4,r25
+#else
 
 	lis	r3,PAGE_OFFSET@h
 	ori	r3,r3,PAGE_OFFSET@l
 
 	/* Kernel is at the base of RAM */
 	li r4, 0			/* Load the kernel physical address */
+#endif
 
 	/* Load the kernel PID = 0 */
 	li	r0,0
-- 
cgit v1.2.2


From 9c5f7d39a86316cd13baf973c90ed27f9f1cc979 Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Wed, 14 Dec 2011 22:58:12 +0000
Subject: powerpc: Process dynamic relocations for kernel

The following patch implements the dynamic relocation processing for
PPC32 kernel. relocate() accepts the target virtual address and relocates
 the kernel image to the same.

Currently the following relocation types are handled :

	R_PPC_RELATIVE
	R_PPC_ADDR16_LO
	R_PPC_ADDR16_HI
	R_PPC_ADDR16_HA

The last 3 relocations in the above list depends on value of Symbol indexed
whose index is encoded in the Relocation entry. Hence we need the Symbol
Table for processing such relocations.

Note: The GNU ld for ppc32 produces buggy relocations for relocation types
that depend on symbols. The value of the symbols with STB_LOCAL scope
should be assumed to be zero. - Alan Modra

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Alan Modra <amodra@au1.ibm.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/Makefile      |   2 +
 arch/powerpc/kernel/reloc_32.S    | 208 ++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/vmlinux.lds.S |   8 +-
 3 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/reloc_32.S

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ce4f7f179117..ee728e433aa2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -85,6 +85,8 @@ extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
 extra-$(CONFIG_8xx)		:= head_8xx.o
 extra-y				+= vmlinux.lds
 
+obj-$(CONFIG_RELOCATABLE_PPC32)	+= reloc_32.o
+
 obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o
 obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
new file mode 100644
index 000000000000..ef46ba6e094f
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -0,0 +1,208 @@
+/*
+ * Code to process dynamic relocations for PPC32.
+ *
+ * Copyrights (C) IBM Corporation, 2011.
+ *	Author: Suzuki Poulose <suzuki@in.ibm.com>
+ *
+ *  - Based on ppc64 code - reloc_64.S
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+/* Dynamic section table entry tags */
+DT_RELA = 7			/* Tag for Elf32_Rela section */
+DT_RELASZ = 8			/* Size of the Rela relocs */
+DT_RELAENT = 9			/* Size of one Rela reloc entry */
+
+STN_UNDEF = 0			/* Undefined symbol index */
+STB_LOCAL = 0			/* Local binding for the symbol */
+
+R_PPC_ADDR16_LO = 4		/* Lower half of (S+A) */
+R_PPC_ADDR16_HI = 5		/* Upper half of (S+A) */
+R_PPC_ADDR16_HA = 6		/* High Adjusted (S+A) */
+R_PPC_RELATIVE = 22
+
+/*
+ * r3 = desired final address
+ */
+
+_GLOBAL(relocate)
+
+	mflr	r0		/* Save our LR */
+	bl	0f		/* Find our current runtime address */
+0:	mflr	r12		/* Make it accessible */
+	mtlr	r0
+
+	lwz	r11, (p_dyn - 0b)(r12)
+	add	r11, r11, r12	/* runtime address of .dynamic section */
+	lwz	r9, (p_rela - 0b)(r12)
+	add	r9, r9, r12	/* runtime address of .rela.dyn section */
+	lwz	r10, (p_st - 0b)(r12)
+	add	r10, r10, r12	/* runtime address of _stext section */
+	lwz	r13, (p_sym - 0b)(r12)
+	add	r13, r13, r12	/* runtime address of .dynsym section */
+
+	/*
+	 * Scan the dynamic section for RELA, RELASZ entries
+	 */
+	li	r6, 0
+	li	r7, 0
+	li	r8, 0
+1:	lwz	r5, 0(r11)	/* ELF_Dyn.d_tag */
+	cmpwi	r5, 0		/* End of ELF_Dyn[] */
+	beq	eodyn
+	cmpwi	r5, DT_RELA
+	bne	relasz
+	lwz	r7, 4(r11)	/* r7 = rela.link */
+	b	skip
+relasz:
+	cmpwi	r5, DT_RELASZ
+	bne	relaent
+	lwz	r8, 4(r11)	/* r8 = Total Rela relocs size */
+	b	skip
+relaent:
+	cmpwi	r5, DT_RELAENT
+	bne	skip
+	lwz	r6, 4(r11)	/* r6 = Size of one Rela reloc */
+skip:
+	addi	r11, r11, 8
+	b	1b
+eodyn:				/* End of Dyn Table scan */
+
+	/* Check if we have found all the entries */
+	cmpwi	r7, 0
+	beq	done
+	cmpwi	r8, 0
+	beq	done
+	cmpwi	r6, 0
+	beq	done
+
+
+	/*
+	 * Work out the current offset from the link time address of .rela
+	 * section.
+	 *  cur_offset[r7] = rela.run[r9] - rela.link [r7]
+	 *  _stext.link[r12] = _stext.run[r10] - cur_offset[r7]
+	 *  final_offset[r3] = _stext.final[r3] - _stext.link[r12]
+	 */
+	subf	r7, r7, r9	/* cur_offset */
+	subf	r12, r7, r10
+	subf	r3, r12, r3	/* final_offset */
+
+	subf	r8, r6, r8	/* relaz -= relaent */
+	/*
+	 * Scan through the .rela table and process each entry
+	 * r9	- points to the current .rela table entry
+	 * r13	- points to the symbol table
+	 */
+
+	/*
+	 * Check if we have a relocation based on symbol
+	 * r5 will hold the value of the symbol.
+	 */
+applyrela:
+	lwz	r4, 4(r9)		/* r4 = rela.r_info */
+	srwi	r5, r4, 8		/* ELF32_R_SYM(r_info) */
+	cmpwi	r5, STN_UNDEF	/* sym == STN_UNDEF ? */
+	beq	get_type	/* value = 0 */
+	/* Find the value of the symbol at index(r5) */
+	slwi	r5, r5, 4		/* r5 = r5 * sizeof(Elf32_Sym) */
+	add	r12, r13, r5	/* r12 = &__dyn_sym[Index] */
+
+	/*
+	 * GNU ld has a bug, where dynamic relocs based on
+	 * STB_LOCAL symbols, the value should be assumed
+	 * to be zero. - Alan Modra
+	 */
+	/* XXX: Do we need to check if we are using GNU ld ? */
+	lbz	r5, 12(r12)	/* r5 = dyn_sym[Index].st_info */
+	extrwi	r5, r5, 4, 24	/* r5 = ELF32_ST_BIND(r5) */
+	cmpwi	r5, STB_LOCAL	/* st_value = 0, ld bug */
+	beq	get_type	/* We have r5 = 0 */
+	lwz	r5, 4(r12)	/* r5 = __dyn_sym[Index].st_value */
+
+get_type:
+	/* Load the relocation type to r4 */
+	extrwi	r4, r4, 8, 24	/* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */
+
+	/* R_PPC_RELATIVE */
+	cmpwi	r4, R_PPC_RELATIVE
+	bne	hi16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3	/* final addend */
+	stwx	r0, r4, r7	/* memory[r4+r7]) = (u32)r0 */
+	b	nxtrela		/* continue */
+
+	/* R_PPC_ADDR16_HI */
+hi16:
+	cmpwi	r4, R_PPC_ADDR16_HI
+	bne	ha16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
+	b	store_half
+
+	/* R_PPC_ADDR16_HA */
+ha16:
+	cmpwi	r4, R_PPC_ADDR16_HA
+	bne	lo16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r5, r0, 1, 16	/* Extract bit 16 */
+	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
+	add	r0, r0, r5	/* Add it to r0 */
+	b	store_half
+
+	/* R_PPC_ADDR16_LO */
+lo16:
+	cmpwi	r4, R_PPC_ADDR16_LO
+	bne	nxtrela
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r0, r0, 16, 16	/* r0 &= 0xffff */
+	/* Fall through to */
+
+	/* Store half word */
+store_half:
+	sthx	r0, r4, r7	/* memory[r4+r7] = (u16)r0 */
+
+nxtrela:
+	/*
+	 * We have to flush the modified instructions to the
+	 * main storage from the d-cache. And also, invalidate the
+	 * cached instructions in i-cache which has been modified.
+	 *
+	 * We delay the sync / isync operation till the end, since
+	 * we won't be executing the modified instructions until
+	 * we return from here.
+	 */
+	dcbst	r4,r7
+	sync			/* Ensure the data is flushed before icbi */
+	icbi	r4,r7
+	cmpwi	r8, 0		/* relasz = 0 ? */
+	ble	done
+	add	r9, r9, r6	/* move to next entry in the .rela table */
+	subf	r8, r6, r8	/* relasz -= relaent */
+	b	applyrela
+
+done:
+	sync			/* Wait for the flush to finish */
+	isync			/* Discard prefetched instructions */
+	blr
+
+p_dyn:		.long	__dynamic_start - 0b
+p_rela:		.long	__rela_dyn_start - 0b
+p_sym:		.long	__dynamic_symtab - 0b
+p_st:		.long	_stext - 0b
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 920276c0f6a1..710a54005dfb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -170,7 +170,13 @@ SECTIONS
 	}
 #ifdef CONFIG_RELOCATABLE
 	. = ALIGN(8);
-	.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) }
+	.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
+	{
+#ifdef CONFIG_RELOCATABLE_PPC32
+		__dynamic_symtab = .;
+#endif
+		*(.dynsym)
+	}
 	.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
 	.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
 	{
-- 
cgit v1.2.2


From 26ecb6c44bb33afc62905ba01b636dde70fc2dc6 Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Wed, 14 Dec 2011 22:59:24 +0000
Subject: powerpc/44x: Enable CONFIG_RELOCATABLE for PPC44x

The following patch adds relocatable kernel support - based on processing
of dynamic relocations - for PPC44x kernel.

We find the runtime address of _stext and relocate ourselves based
on the following calculation.

	virtual_base = ALIGN(KERNELBASE,256M) +
			MODULO(_stext.run,256M)

relocate() is called with the Effective Virtual Base Address (as
shown below)

            | Phys. Addr| Virt. Addr |
Page (256M) |------------------------|
Boundary    |           |            |
            |           |            |
            |           |            |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)|           |      ^     |Virt. Base Addr
            |           |      |     |
            |           |      |     |
            |           |reloc_offset|
            |           |      |     |
            |           |      |     |
            |           |______v_____|<-(KERNELBASE)%256M
            |           |            |
            |           |            |
            |           |            |
Page(256M)  |-----------|------------|
Boundary    |           |            |

The virt_phys_offset is updated accordingly, i.e,

	virt_phys_offset = effective. kernel virt base - kernstart_addr

I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Tony Breeds <tony@bakeyournoodle.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 arch/powerpc/kernel/head_44x.S | 95 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 0878bf5d8a68..7dd2981bcc50 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -64,6 +64,35 @@ _ENTRY(_start);
 	mr	r31,r3		/* save device tree ptr */
 	li	r24,0		/* CPU number */
 
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Relocate ourselves to the current runtime address.
+ * This is called only by the Boot CPU.
+ * "relocate" is called with our current runtime virutal
+ * address.
+ * r21 will be loaded with the physical runtime address of _stext
+ */
+	bl	0f				/* Get our runtime address */
+0:	mflr	r21				/* Make it accessible */
+	addis	r21,r21,(_stext - 0b)@ha
+	addi	r21,r21,(_stext - 0b)@l 	/* Get our current runtime base */
+
+	/*
+	 * We have the runtime (virutal) address of our base.
+	 * We calculate our shift of offset from a 256M page.
+	 * We could map the 256M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r21,0,4,31			/* r6 = PHYS_START % 256M */
+	rlwinm	r5,r4,0,4,31			/* r5 = KERNELBASE % 256M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virutal Address */
+
+	bl	relocate
+#endif
+
 	bl	init_cpu_state
 
 	/*
@@ -86,7 +115,64 @@ _ENTRY(_start);
 
 	bl	early_init
 
-#ifdef CONFIG_DYNAMIC_MEMSTART
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * Relocatable kernel support based on processing of dynamic
+	 * relocation entries.
+	 *
+	 * r25 will contain RPN/ERPN for the start address of memory
+	 * r21 will contain the current offset of _stext
+	 */
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+
+	/*
+	 * Compute the kernstart_addr.
+	 * kernstart_addr => (r6,r8)
+	 * kernstart_addr & ~0xfffffff => (r6,r7)
+	 */
+	rlwinm	r6,r25,0,28,31	/* ERPN. Bits 32-35 of Address */
+	rlwinm	r7,r25,0,0,3	/* RPN - assuming 256 MB page size */
+	rlwinm	r8,r21,0,4,31	/* r8 = (_stext & 0xfffffff) */
+	or	r8,r7,r8	/* Compute the lower 32bit of kernstart_addr */
+
+	/* Store kernstart_addr */
+	stw	r6,0(r3)	/* higher 32bit */
+	stw	r8,4(r3)	/* lower 32bit  */
+
+	/*
+	 * Compute the virt_phys_offset :
+	 * virt_phys_offset = stext.run - kernstart_addr
+	 *
+	 * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+	 * When we relocate, we have :
+	 *
+	 *	(kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+	 *
+	 * hence:
+	 *  virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
+	 *
+	 */
+
+	/* KERNELBASE&~0xfffffff => (r4,r5) */
+	li	r4, 0		/* higer 32bit */
+	lis	r5,KERNELBASE@h
+	rlwinm	r5,r5,0,0,3	/* Align to 256M, lower 32bit */
+
+	/*
+	 * 64bit subtraction.
+	 */
+	subfc	r5,r7,r5
+	subfe	r4,r6,r4
+
+	/* Store virt_phys_offset */
+	lis	r3,virt_phys_offset@ha
+	la	r3,virt_phys_offset@l(r3)
+
+	stw	r4,0(r3)
+	stw	r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
 	/*
 	 * Mapping based, page aligned dynamic kernel loading.
 	 *
@@ -804,7 +890,12 @@ skpinv:	addi	r4,r4,1				/* Increment */
 /*
  * Configure and load pinned entry into TLB slot 63.
  */
-#ifdef CONFIG_DYNAMIC_MEMSTART
+#ifdef CONFIG_NONSTATIC_KERNEL
+	/*
+	 * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
+	 * entries of the initial mapping set by the boot loader.
+	 * The XLAT entry is stored in r25
+	 */
 
 	/* Read the XLAT entry for our current mapping */
 	tlbre	r25,r23,PPC44x_TLB_XLAT
-- 
cgit v1.2.2


From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:42 +0100
Subject: PM: Drop generic_subsys_pm_ops

Since the PM core is now going to execute driver callbacks directly
if the corresponding subsystem callbacks are not present,
forward-only subsystem callbacks (i.e. such that only execute the
corresponding driver callbacks) are not necessary any more.  Thus
it is possible to remove generic_subsys_pm_ops, because the only
callback in there that is not forward-only, .runtime_idle, is not
really used by the only user of generic_subsys_pm_ops, which is
vio_bus_type.

However, the generic callback routines themselves cannot be removed
from generic_ops.c, because they are used individually by a number
of subsystems.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/powerpc/kernel/vio.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index f65af61996bd..8b086299ba25 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
-	.pm = GENERIC_SUBSYS_PM_OPS,
 };
 
 /**
-- 
cgit v1.2.2


From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 21 Dec 2011 14:29:42 -0800
Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular
 subsystem

This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem
and converts the devices to regular devices. The sysdev drivers are
implemented as subsystem interfaces now.

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Userspace relies on events and generic sysfs subsystem infrastructure
from sysdev devices, which are made available with this conversion.

Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@amd64.org>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Cc: Len Brown <lenb@kernel.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/kernel/cacheinfo.c |  10 +-
 arch/powerpc/kernel/smp.c       |   2 +-
 arch/powerpc/kernel/sysfs.c     | 257 ++++++++++++++++++++--------------------
 3 files changed, 134 insertions(+), 135 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index a3c684b4c862..92c6b008dd2b 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -451,15 +451,15 @@ out:
 static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
 {
 	struct cache_dir *cache_dir;
-	struct sys_device *sysdev;
+	struct device *dev;
 	struct kobject *kobj = NULL;
 
-	sysdev = get_cpu_sysdev(cpu_id);
-	WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
-	if (!sysdev)
+	dev = get_cpu_device(cpu_id);
+	WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id);
+	if (!dev)
 		goto err;
 
-	kobj = kobject_create_and_add("cache", &sysdev->kobj);
+	kobj = kobject_create_and_add("cache", &dev->kobj);
 	if (!kobj)
 		goto err;
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 25ddbfc7dd36..da08240353fa 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -27,7 +27,7 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/err.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/topology.h>
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ce035c1905f0..f396ef27916b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1,4 +1,4 @@
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -37,12 +37,12 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
 /* Time in microseconds we delay before sleeping in the idle loop */
 DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
 
-static ssize_t store_smt_snooze_delay(struct sys_device *dev,
-				      struct sysdev_attribute *attr,
+static ssize_t store_smt_snooze_delay(struct device *dev,
+				      struct device_attribute *attr,
 				      const char *buf,
 				      size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	ssize_t ret;
 	long snooze;
 
@@ -50,21 +50,21 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
 	if (ret != 1)
 		return -EINVAL;
 
-	per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
 
 	return count;
 }
 
-static ssize_t show_smt_snooze_delay(struct sys_device *dev,
-				     struct sysdev_attribute *attr,
+static ssize_t show_smt_snooze_delay(struct device *dev,
+				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
-	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
 }
 
-static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 		   store_smt_snooze_delay);
 
 static int __init setup_smt_snooze_delay(char *str)
@@ -117,25 +117,25 @@ static void write_##NAME(void *val) \
 	ppc_enable_pmcs(); \
 	mtspr(ADDRESS, *(unsigned long *)val);	\
 } \
-static ssize_t show_##NAME(struct sys_device *dev, \
-			struct sysdev_attribute *attr, \
+static ssize_t show_##NAME(struct device *dev, \
+			struct device_attribute *attr, \
 			char *buf) \
 { \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
 	unsigned long val; \
-	smp_call_function_single(cpu->sysdev.id, read_##NAME, &val, 1);	\
+	smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1);	\
 	return sprintf(buf, "%lx\n", val); \
 } \
 static ssize_t __used \
-	store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \
+	store_##NAME(struct device *dev, struct device_attribute *attr, \
 			const char *buf, size_t count) \
 { \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
 	unsigned long val; \
 	int ret = sscanf(buf, "%lx", &val); \
 	if (ret != 1) \
 		return -EINVAL; \
-	smp_call_function_single(cpu->sysdev.id, write_##NAME, &val, 1); \
+	smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
 	return count; \
 }
 
@@ -178,22 +178,22 @@ SYSFS_PMCSETUP(purr, SPRN_PURR);
 SYSFS_PMCSETUP(spurr, SPRN_SPURR);
 SYSFS_PMCSETUP(dscr, SPRN_DSCR);
 
-static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
-static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
-static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
-static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(spurr, 0600, show_spurr, NULL);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
 
 unsigned long dscr_default = 0;
 EXPORT_SYMBOL(dscr_default);
 
-static ssize_t show_dscr_default(struct sysdev_class *class,
-		struct sysdev_class_attribute *attr, char *buf)
+static ssize_t show_dscr_default(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%lx\n", dscr_default);
 }
 
-static ssize_t __used store_dscr_default(struct sysdev_class *class,
-		struct sysdev_class_attribute *attr, const char *buf,
+static ssize_t __used store_dscr_default(struct device *dev,
+		struct device_attribute *attr, const char *buf,
 		size_t count)
 {
 	unsigned long val;
@@ -207,15 +207,14 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class,
 	return count;
 }
 
-static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+static DEVICE_ATTR(dscr_default, 0600,
 		show_dscr_default, store_dscr_default);
 
 static void sysfs_create_dscr_default(void)
 {
 	int err = 0;
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
-			&attr_dscr_default.attr);
+		err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
 }
 #endif /* CONFIG_PPC64 */
 
@@ -259,72 +258,72 @@ SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);
 #endif /* HAS_PPC_PMC_PA6T */
 
 #ifdef HAS_PPC_PMC_IBM
-static struct sysdev_attribute ibm_common_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+static struct device_attribute ibm_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
 #ifdef HAS_PPC_PMC_G4
-static struct sysdev_attribute g4_common_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
-	_SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
+static struct device_attribute g4_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
-static struct sysdev_attribute classic_pmc_attrs[] = {
-	_SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1),
-	_SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2),
-	_SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3),
-	_SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4),
-	_SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5),
-	_SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6),
+static struct device_attribute classic_pmc_attrs[] = {
+	__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
+	__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
+	__ATTR(pmc3, 0600, show_pmc3, store_pmc3),
+	__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
+	__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
+	__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
 #ifdef CONFIG_PPC64
-	_SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7),
-	_SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8),
+	__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
+	__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
 #endif
 };
 
 #ifdef HAS_PPC_PMC_PA6T
-static struct sysdev_attribute pa6t_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
-	_SYSDEV_ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
-	_SYSDEV_ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
-	_SYSDEV_ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
-	_SYSDEV_ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
-	_SYSDEV_ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
-	_SYSDEV_ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
+static struct device_attribute pa6t_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
+	__ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
+	__ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
+	__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
+	__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
+	__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
 #ifdef CONFIG_DEBUG_KERNEL
-	_SYSDEV_ATTR(hid0, 0600, show_hid0, store_hid0),
-	_SYSDEV_ATTR(hid1, 0600, show_hid1, store_hid1),
-	_SYSDEV_ATTR(hid4, 0600, show_hid4, store_hid4),
-	_SYSDEV_ATTR(hid5, 0600, show_hid5, store_hid5),
-	_SYSDEV_ATTR(ima0, 0600, show_ima0, store_ima0),
-	_SYSDEV_ATTR(ima1, 0600, show_ima1, store_ima1),
-	_SYSDEV_ATTR(ima2, 0600, show_ima2, store_ima2),
-	_SYSDEV_ATTR(ima3, 0600, show_ima3, store_ima3),
-	_SYSDEV_ATTR(ima4, 0600, show_ima4, store_ima4),
-	_SYSDEV_ATTR(ima5, 0600, show_ima5, store_ima5),
-	_SYSDEV_ATTR(ima6, 0600, show_ima6, store_ima6),
-	_SYSDEV_ATTR(ima7, 0600, show_ima7, store_ima7),
-	_SYSDEV_ATTR(ima8, 0600, show_ima8, store_ima8),
-	_SYSDEV_ATTR(ima9, 0600, show_ima9, store_ima9),
-	_SYSDEV_ATTR(imaat, 0600, show_imaat, store_imaat),
-	_SYSDEV_ATTR(btcr, 0600, show_btcr, store_btcr),
-	_SYSDEV_ATTR(pccr, 0600, show_pccr, store_pccr),
-	_SYSDEV_ATTR(rpccr, 0600, show_rpccr, store_rpccr),
-	_SYSDEV_ATTR(der, 0600, show_der, store_der),
-	_SYSDEV_ATTR(mer, 0600, show_mer, store_mer),
-	_SYSDEV_ATTR(ber, 0600, show_ber, store_ber),
-	_SYSDEV_ATTR(ier, 0600, show_ier, store_ier),
-	_SYSDEV_ATTR(sier, 0600, show_sier, store_sier),
-	_SYSDEV_ATTR(siar, 0600, show_siar, store_siar),
-	_SYSDEV_ATTR(tsr0, 0600, show_tsr0, store_tsr0),
-	_SYSDEV_ATTR(tsr1, 0600, show_tsr1, store_tsr1),
-	_SYSDEV_ATTR(tsr2, 0600, show_tsr2, store_tsr2),
-	_SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3),
+	__ATTR(hid0, 0600, show_hid0, store_hid0),
+	__ATTR(hid1, 0600, show_hid1, store_hid1),
+	__ATTR(hid4, 0600, show_hid4, store_hid4),
+	__ATTR(hid5, 0600, show_hid5, store_hid5),
+	__ATTR(ima0, 0600, show_ima0, store_ima0),
+	__ATTR(ima1, 0600, show_ima1, store_ima1),
+	__ATTR(ima2, 0600, show_ima2, store_ima2),
+	__ATTR(ima3, 0600, show_ima3, store_ima3),
+	__ATTR(ima4, 0600, show_ima4, store_ima4),
+	__ATTR(ima5, 0600, show_ima5, store_ima5),
+	__ATTR(ima6, 0600, show_ima6, store_ima6),
+	__ATTR(ima7, 0600, show_ima7, store_ima7),
+	__ATTR(ima8, 0600, show_ima8, store_ima8),
+	__ATTR(ima9, 0600, show_ima9, store_ima9),
+	__ATTR(imaat, 0600, show_imaat, store_imaat),
+	__ATTR(btcr, 0600, show_btcr, store_btcr),
+	__ATTR(pccr, 0600, show_pccr, store_pccr),
+	__ATTR(rpccr, 0600, show_rpccr, store_rpccr),
+	__ATTR(der, 0600, show_der, store_der),
+	__ATTR(mer, 0600, show_mer, store_mer),
+	__ATTR(ber, 0600, show_ber, store_ber),
+	__ATTR(ier, 0600, show_ier, store_ier),
+	__ATTR(sier, 0600, show_sier, store_sier),
+	__ATTR(siar, 0600, show_siar, store_siar),
+	__ATTR(tsr0, 0600, show_tsr0, store_tsr0),
+	__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
+	__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
+	__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
 #endif /* CONFIG_DEBUG_KERNEL */
 };
 #endif /* HAS_PPC_PMC_PA6T */
@@ -333,14 +332,14 @@ static struct sysdev_attribute pa6t_attrs[] = {
 static void __cpuinit register_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-	struct sysdev_attribute *attrs, *pmc_attrs;
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
 	int i, nattrs;
 
 #ifdef CONFIG_PPC64
 	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&
 			cpu_has_feature(CPU_FTR_SMT))
-		sysdev_create_file(s, &attr_smt_snooze_delay);
+		device_create_file(s, &dev_attr_smt_snooze_delay);
 #endif
 
 	/* PMC stuff */
@@ -348,14 +347,14 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
 	case PPC_PMC_IBM:
 		attrs = ibm_common_attrs;
-		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
 	case PPC_PMC_G4:
 		attrs = g4_common_attrs;
-		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -363,7 +362,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 	case PPC_PMC_PA6T:
 		/* PA Semi starts counting at PMC0 */
 		attrs = pa6t_attrs;
-		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = NULL;
 		break;
 #endif /* HAS_PPC_PMC_PA6T */
@@ -374,24 +373,24 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 	}
 
 	for (i = 0; i < nattrs; i++)
-		sysdev_create_file(s, &attrs[i]);
+		device_create_file(s, &attrs[i]);
 
 	if (pmc_attrs)
 		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
-			sysdev_create_file(s, &pmc_attrs[i]);
+			device_create_file(s, &pmc_attrs[i]);
 
 #ifdef CONFIG_PPC64
 	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_create_file(s, &attr_mmcra);
+		device_create_file(s, &dev_attr_mmcra);
 
 	if (cpu_has_feature(CPU_FTR_PURR))
-		sysdev_create_file(s, &attr_purr);
+		device_create_file(s, &dev_attr_purr);
 
 	if (cpu_has_feature(CPU_FTR_SPURR))
-		sysdev_create_file(s, &attr_spurr);
+		device_create_file(s, &dev_attr_spurr);
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		sysdev_create_file(s, &attr_dscr);
+		device_create_file(s, &dev_attr_dscr);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_online(cpu);
@@ -401,8 +400,8 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 static void unregister_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-	struct sysdev_attribute *attrs, *pmc_attrs;
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
 	int i, nattrs;
 
 	BUG_ON(!c->hotpluggable);
@@ -410,7 +409,7 @@ static void unregister_cpu_online(unsigned int cpu)
 #ifdef CONFIG_PPC64
 	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&
 			cpu_has_feature(CPU_FTR_SMT))
-		sysdev_remove_file(s, &attr_smt_snooze_delay);
+		device_remove_file(s, &dev_attr_smt_snooze_delay);
 #endif
 
 	/* PMC stuff */
@@ -418,14 +417,14 @@ static void unregister_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
 	case PPC_PMC_IBM:
 		attrs = ibm_common_attrs;
-		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
 	case PPC_PMC_G4:
 		attrs = g4_common_attrs;
-		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -433,7 +432,7 @@ static void unregister_cpu_online(unsigned int cpu)
 	case PPC_PMC_PA6T:
 		/* PA Semi starts counting at PMC0 */
 		attrs = pa6t_attrs;
-		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = NULL;
 		break;
 #endif /* HAS_PPC_PMC_PA6T */
@@ -444,24 +443,24 @@ static void unregister_cpu_online(unsigned int cpu)
 	}
 
 	for (i = 0; i < nattrs; i++)
-		sysdev_remove_file(s, &attrs[i]);
+		device_remove_file(s, &attrs[i]);
 
 	if (pmc_attrs)
 		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
-			sysdev_remove_file(s, &pmc_attrs[i]);
+			device_remove_file(s, &pmc_attrs[i]);
 
 #ifdef CONFIG_PPC64
 	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_remove_file(s, &attr_mmcra);
+		device_remove_file(s, &dev_attr_mmcra);
 
 	if (cpu_has_feature(CPU_FTR_PURR))
-		sysdev_remove_file(s, &attr_purr);
+		device_remove_file(s, &dev_attr_purr);
 
 	if (cpu_has_feature(CPU_FTR_SPURR))
-		sysdev_remove_file(s, &attr_spurr);
+		device_remove_file(s, &dev_attr_spurr);
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		sysdev_remove_file(s, &attr_dscr);
+		device_remove_file(s, &dev_attr_dscr);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_offline(cpu);
@@ -513,70 +512,70 @@ static struct notifier_block __cpuinitdata sysfs_cpu_nb = {
 
 static DEFINE_MUTEX(cpu_mutex);
 
-int cpu_add_sysdev_attr(struct sysdev_attribute *attr)
+int cpu_add_dev_attr(struct device_attribute *attr)
 {
 	int cpu;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev_create_file(get_cpu_sysdev(cpu), attr);
+		device_create_file(get_cpu_device(cpu), attr);
 	}
 
 	mutex_unlock(&cpu_mutex);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr);
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr);
 
-int cpu_add_sysdev_attr_group(struct attribute_group *attrs)
+int cpu_add_dev_attr_group(struct attribute_group *attrs)
 {
 	int cpu;
-	struct sys_device *sysdev;
+	struct device *dev;
 	int ret;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		ret = sysfs_create_group(&sysdev->kobj, attrs);
+		dev = get_cpu_device(cpu);
+		ret = sysfs_create_group(&dev->kobj, attrs);
 		WARN_ON(ret != 0);
 	}
 
 	mutex_unlock(&cpu_mutex);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group);
 
 
-void cpu_remove_sysdev_attr(struct sysdev_attribute *attr)
+void cpu_remove_dev_attr(struct device_attribute *attr)
 {
 	int cpu;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev_remove_file(get_cpu_sysdev(cpu), attr);
+		device_remove_file(get_cpu_device(cpu), attr);
 	}
 
 	mutex_unlock(&cpu_mutex);
 }
-EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr);
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr);
 
-void cpu_remove_sysdev_attr_group(struct attribute_group *attrs)
+void cpu_remove_dev_attr_group(struct attribute_group *attrs)
 {
 	int cpu;
-	struct sys_device *sysdev;
+	struct device *dev;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		sysfs_remove_group(&sysdev->kobj, attrs);
+		dev = get_cpu_device(cpu);
+		sysfs_remove_group(&dev->kobj, attrs);
 	}
 
 	mutex_unlock(&cpu_mutex);
 }
-EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
 
 
 /* NUMA stuff */
@@ -590,7 +589,7 @@ static void register_nodes(void)
 		register_one_node(i);
 }
 
-int sysfs_add_device_to_node(struct sys_device *dev, int nid)
+int sysfs_add_device_to_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
 	return sysfs_create_link(&node->sysdev.kobj, &dev->kobj,
@@ -598,7 +597,7 @@ int sysfs_add_device_to_node(struct sys_device *dev, int nid)
 }
 EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
 
-void sysfs_remove_device_from_node(struct sys_device *dev, int nid)
+void sysfs_remove_device_from_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
 	sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj));
@@ -614,14 +613,14 @@ static void register_nodes(void)
 #endif
 
 /* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_physical_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
-	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id));
 }
-static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
 
 static int __init topology_init(void)
 {
@@ -646,7 +645,7 @@ static int __init topology_init(void)
 		if (cpu_online(cpu) || c->hotpluggable) {
 			register_cpu(c, cpu);
 
-			sysdev_create_file(&c->sysdev, &attr_physical_id);
+			device_create_file(&c->dev, &dev_attr_physical_id);
 		}
 
 		if (cpu_online(cpu))
-- 
cgit v1.2.2


From 10fbcf4c6cb122005cdf36fc24d7683da92c7a27 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 21 Dec 2011 14:48:43 -0800
Subject: convert 'memory' sysdev_class to a regular subsystem

This moves the 'memory sysdev_class' over to a regular 'memory' subsystem
and converts the devices to regular devices. The sysdev drivers are
implemented as subsystem interfaces now.

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/kernel/sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index f396ef27916b..5e7c1655f13a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -592,7 +592,7 @@ static void register_nodes(void)
 int sysfs_add_device_to_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
-	return sysfs_create_link(&node->sysdev.kobj, &dev->kobj,
+	return sysfs_create_link(&node->dev.kobj, &dev->kobj,
 			kobject_name(&dev->kobj));
 }
 EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
@@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
 void sysfs_remove_device_from_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
-	sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj));
+	sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
 
-- 
cgit v1.2.2


From d161a13f974c72fd7ff0069d39a3ae57cb5694ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 03:36:29 -0400
Subject: switch procfs to umode_t use

both proc_dir_entry ->mode and populating functions

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/kernel/lparcfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 84daabe2fcba..578f35f18723 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -783,7 +783,7 @@ static const struct file_operations lparcfg_fops = {
 static int __init lparcfg_init(void)
 {
 	struct proc_dir_entry *ent;
-	mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
 
 	/* Allow writing if we have FW_FEATURE_SPLPAR */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) &&
-- 
cgit v1.2.2