From 578b7cd1518f8d1b17a7fb1671d3d756c9cb49f1 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 7 Apr 2010 14:44:28 +1000
Subject: powerpc/vio: Add modalias support

BenH: Added to vio_cmo_dev_attrs as well

Provide a modalias entry for VIO devices in sysfs.  I believe
this was another initrd generation bugfix for anaconda.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 82237176a2a3..2f57956714bd 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -958,9 +958,12 @@ viodev_cmo_rd_attr(allocated);
 
 static ssize_t name_show(struct device *, struct device_attribute *, char *);
 static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf);
 static struct device_attribute vio_cmo_dev_attrs[] = {
 	__ATTR_RO(name),
 	__ATTR_RO(devspec),
+	__ATTR_RO(modalias),
 	__ATTR(cmo_desired,       S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
 	       viodev_cmo_desired_show, viodev_cmo_desired_set),
 	__ATTR(cmo_entitled,      S_IRUGO, viodev_cmo_entitled_show,      NULL),
@@ -1320,9 +1323,27 @@ static ssize_t devspec_show(struct device *dev,
 	return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
 }
 
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	const struct vio_dev *vio_dev = to_vio_dev(dev);
+	struct device_node *dn;
+	const char *cp;
+
+	dn = dev->archdata.of_node;
+	if (!dn)
+		return -ENODEV;
+	cp = of_get_property(dn, "compatible", NULL);
+	if (!cp)
+		return -ENODEV;
+
+	return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
+}
+
 static struct device_attribute vio_dev_attrs[] = {
 	__ATTR_RO(name),
 	__ATTR_RO(devspec),
+	__ATTR_RO(modalias),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.2


From 9c7cc234dc5edf5379fbbab4973f6704f59bc57b Mon Sep 17 00:00:00 2001
From: "K.Prasad" <prasad@linux.vnet.ibm.com>
Date: Mon, 29 Mar 2010 23:59:25 +0000
Subject: powerpc: Disable interrupts for data breakpoint exceptions

Data address breakpoint exceptions are currently handled along with page-faults
which require interrupts to remain in enabled state. Since exception handling
for data breakpoints aren't pre-empt safe, we handle them separately.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/exceptions-64s.S | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e3be98ffe2a7..3e423fbad6bc 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -735,8 +735,11 @@ _STATIC(do_hash_page)
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
 
-	andis.	r0,r4,0xa450		/* weird error? */
+	andis.	r0,r4,0xa410		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
+	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+
 BEGIN_FTR_SECTION
 	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
 	bne-	do_ste_alloc		/* If so handle it */
@@ -823,6 +826,14 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
 	bl	.raw_local_irq_restore
 	b	11f
 
+/* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+	ld      r4,_DAR(r1)
+	ld      r5,_DSISR(r1)
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl      .do_dabr
+	b       .ret_from_except_lite
+
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
 	ENABLE_INTS
-- 
cgit v1.2.2


From 6fe9d1facb5346a615f9b571df3b91593afb29c3 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Wed, 31 Mar 2010 21:39:24 +0000
Subject: powerpc/pseries: Export data from new hcall H_EM_GET_PARMS

Add support for H_EM_GET_PARMS hcall that will return data
related to power modes from the platform.  Export the data
directly to user space for administrative tools to interpret
and use.

cat /proc/powerpc/lparcfg will export power mode data

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/lparcfg.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index c2c70e1b32cd..50362b6ef6e9 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -38,7 +38,7 @@
 #include <asm/vio.h>
 #include <asm/mmu.h>
 
-#define MODULE_VERS "1.8"
+#define MODULE_VERS "1.9"
 #define MODULE_NAME "lparcfg"
 
 /* #define LPARCFG_DEBUG */
@@ -487,6 +487,14 @@ static void splpar_dispatch_data(struct seq_file *m)
 	seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
 }
 
+static void parse_em_data(struct seq_file *m)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
 static int pseries_lparcfg_data(struct seq_file *m, void *v)
 {
 	int partition_potential_processors;
@@ -541,6 +549,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 
 	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
 
+	parse_em_data(m);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From fe1691e3f49d41452832f5aee2b952bd201ccab1 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Tue, 2 Mar 2010 05:37:09 +0000
Subject: powerpc/8xx: Optimze TLB Miss handlers

This removes a couple of insn's from the TLB Miss
handlers whithout changing functionality.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_8xx.S | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 3ef743fa5d7c..ecc4a02277e3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -343,17 +343,14 @@ InstructionTLBMiss:
 	cmpwi	cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
 	bne-	cr0, 2f
 
-	/* Clear PP lsb, 0x400 */
-	rlwinm 	r10, r10, 0, 22, 20
-
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 22 and 28 must be clear.
+	 * Software indicator bits 21 and 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
 	li	r11, 0x00f0
-	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
+	rlwimi	r10, r11, 0, 0x07f8	/* Set 24-27, clear 21-23,28 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
@@ -444,9 +441,7 @@ DataStoreTLBMiss:
 
 	/* Honour kernel RO, User NA */
 	/* 0x200 == Extended encoding, bit 22 */
-	/* r11 =  (r10 & _PAGE_USER) >> 2 */
-	rlwinm	r11, r10, 32-2, 0x200
-	or	r10, r11, r10
+	rlwimi	r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
 	/* r11 =  (r10 & _PAGE_RW) >> 1 */
 	rlwinm	r11, r10, 32-1, 0x200
 	or	r10, r11, r10
-- 
cgit v1.2.2


From 4afb0be7ccda0ca551cc37572bab74ba4a3c18dd Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Tue, 2 Mar 2010 05:37:10 +0000
Subject: powerpc/8xx: Avoid testing for kernel space in ITLB Miss.

Only modules will cause ITLB Misses as we always pin
the first 8MB of kernel memory.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_8xx.S | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ecc4a02277e3..84ca1d9b9ed3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -318,12 +318,16 @@ InstructionTLBMiss:
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 */
+#ifdef CONFIG_MODULES
+	/* Only modules will cause ITLB Misses as we always
+	 * pin the first 8MB of kernel memory */
 	andi.	r11, r10, 0x0800	/* Address >= 0x80000000 */
 	beq	3f
 	lis	r11, swapper_pg_dir@h
 	ori	r11, r11, swapper_pg_dir@l
 	rlwimi	r10, r11, 0, 2, 19
 3:
+#endif
 	lwz	r11, 0(r10)	/* Get the level 1 entry */
 	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
 	beq	2f		/* If zero, don't try to find a pte */
-- 
cgit v1.2.2


From d069cb4373fe0d451357c4d3769623a7564dfa9f Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Tue, 2 Mar 2010 05:37:11 +0000
Subject: powerpc/8xx: Don't touch ACCESSED when no SWAP.

Only the swap function cares about the ACCESSED bit in
the pte. Do not waste cycles updateting ACCESSED when swap
is not compiled into the kernel.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_8xx.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 84ca1d9b9ed3..6478a9632552 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -343,10 +343,11 @@ InstructionTLBMiss:
 	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
 	lwz	r10, 0(r11)	/* Get the pte */
 
+#ifdef CONFIG_SWAP
 	andi.	r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
 	cmpwi	cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
 	bne-	cr0, 2f
-
+#endif
 	/* The Linux PTE won't go exactly into the MMU TLB.
 	 * Software indicator bits 21 and 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
@@ -439,10 +440,11 @@ DataStoreTLBMiss:
 	 * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
 	 * r10 = (r10 & ~PRESENT) | r11;
 	 */
+#ifdef CONFIG_SWAP
 	rlwinm	r11, r10, 32-5, _PAGE_PRESENT
 	and	r11, r11, r10
 	rlwimi	r10, r11, 0, _PAGE_PRESENT
-
+#endif
 	/* Honour kernel RO, User NA */
 	/* 0x200 == Extended encoding, bit 22 */
 	rlwimi	r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
-- 
cgit v1.2.2


From 469d62be9263b92f2c3329540cbb1c076111f4f3 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Tue, 2 Mar 2010 05:37:12 +0000
Subject: powerpc/8xx: Use SPRG2 and DAR registers to stash r11 and cr.

This avoids storing these registers in memory.
CPU6 errata will still use the old way.
Remove some G2 leftover accesses from 2.4

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_8xx.S | 49 +++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 6478a9632552..1f1a04b5c2a4 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -71,9 +71,6 @@ _ENTRY(_start);
  * in the first level table, but that would require many changes to the
  * Linux page directory/table functions that I don't want to do right now.
  *
- * I used to use SPRG2 for a temporary register in the TLB handler, but it
- * has since been put to other uses.  I now use a hack to save a register
- * and the CCR at memory location 0.....Someday I'll fix this.....
  *	-- Dan
  */
 	.globl	__start
@@ -302,8 +299,13 @@ InstructionTLBMiss:
 	DO_8xx_CPU6(0x3f80, r3)
 	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
 	mfcr	r10
+#ifdef CONFIG_8xx_CPU6
 	stw	r10, 0(r0)
 	stw	r11, 4(r0)
+#else
+	mtspr	SPRN_DAR, r10
+	mtspr	SPRN_SPRG2, r11
+#endif
 	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
 	addi	r11, r10, 0x1000
@@ -359,13 +361,19 @@ InstructionTLBMiss:
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
-	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	/* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+	mfspr	r10, SPRN_DAR
+	mtcr	r10
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r11, SPRN_SPRG2
+#else
 	lwz	r11, 0(r0)
 	mtcr	r11
 	lwz	r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
+	mfspr	r10, SPRN_M_TW
 	rfi
 2:
 	mfspr	r11, SPRN_SRR1
@@ -375,13 +383,20 @@ InstructionTLBMiss:
 	rlwinm	r11, r11, 0, 0xffff
 	mtspr	SPRN_SRR1, r11
 
-	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	/* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+	mfspr	r10, SPRN_DAR
+	mtcr	r10
+	li	r11, 0x00f0
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r11, SPRN_SPRG2
+#else
 	lwz	r11, 0(r0)
 	mtcr	r11
 	lwz	r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
+	mfspr	r10, SPRN_M_TW
 	b	InstructionAccess
 
 	. = 0x1200
@@ -392,8 +407,13 @@ DataStoreTLBMiss:
 	DO_8xx_CPU6(0x3f80, r3)
 	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
 	mfcr	r10
+#ifdef CONFIG_8xx_CPU6
 	stw	r10, 0(r0)
 	stw	r11, 4(r0)
+#else
+	mtspr	SPRN_DAR, r10
+	mtspr	SPRN_SPRG2, r11
+#endif
 	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -461,18 +481,24 @@ DataStoreTLBMiss:
 	 * of the MMU.
 	 */
 2:	li	r11, 0x00f0
-	mtspr	SPRN_DAR,r11	/* Tag DAR */
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
 
-	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	/* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+	mfspr	r10, SPRN_DAR
+	mtcr	r10
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r11, SPRN_SPRG2
+#else
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
 	lwz	r11, 0(r0)
 	mtcr	r11
 	lwz	r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
+	mfspr	r10, SPRN_M_TW
 	rfi
 
 /* This is an instruction TLB error on the MPC8xx.  This could be due
@@ -684,9 +710,6 @@ start_here:
 	tophys(r4,r2)
 	addi	r4,r4,THREAD	/* init task's THREAD */
 	mtspr	SPRN_SPRG_THREAD,r4
-	li	r3,0
-	/* XXX What is that for ? SPRG2 appears otherwise unused on 8xx */
-	mtspr	SPRN_SPRG2,r3	/* 0 => r1 has kernel sp */
 
 	/* stack */
 	lis	r1,init_thread_union@ha
-- 
cgit v1.2.2


From f6d8c8bb1d360272d795927d39f3d2c5934e77d9 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 29 Mar 2010 05:33:34 +0000
Subject: powerpc/vio: Add missing unlock in error path

Add an unlock before exiting the function.

A simplified version of the semantic patch that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
expression E1;
identifier f;
@@

f (...) { <+...
* spin_lock_irq (E1,...);
... when != E1
* return ...;
...+> }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 2f57956714bd..2a3428bef83a 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -645,8 +645,10 @@ void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
 			found = 1;
 			break;
 		}
-	if (!found)
+	if (!found) {
+		spin_unlock_irqrestore(&vio_cmo.lock, flags);
 		return;
+	}
 
 	/* Increase/decrease in desired device entitlement */
 	if (desired >= viodev->cmo.desired) {
-- 
cgit v1.2.2


From 21dbeb91a24d867af0e98ba155bfa80d2906344f Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 26 Mar 2010 12:03:29 +0000
Subject: powerpc: Use set_cpus_allowed_ptr

Use set_cpus_allowed_ptr rather than set_cpus_allowed.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1,E2;
@@

- set_cpus_allowed(E1, cpumask_of_cpu(E2))
+ set_cpus_allowed_ptr(E1, cpumask_of(E2))

@@
expression E;
identifier I;
@@

- set_cpus_allowed(E, I)
+ set_cpus_allowed_ptr(E, &I)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c2ee14498077..e36f94f7411a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -561,12 +561,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	old_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
+	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
 		smp_ops->setup_cpu(boot_cpuid);
 
-	set_cpus_allowed(current, old_mask);
+	set_cpus_allowed_ptr(current, &old_mask);
 
 	snapshot_timebases();
 
-- 
cgit v1.2.2


From e9bbc8cde0e3c33b42ddbe1b02108cb5c97275eb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 18 Feb 2010 12:11:51 +0000
Subject: powerpc/pseries: Call ibm,os-term if the ibm,extended-os-term is
 present

We have had issues in the past with ibm,os-term initiating shutdown of a
partition. This is confusing to the user, especially if panic_timeout is
non zero.

The temporary fix was to avoid calling ibm,os-term if a panic_timeout was set
and since we set it on every boot we basically never call ibm,os-term.

An extended version of ibm,os-term has since been implemented which gives us
the behaviour we want:

  "When the platform supports extended ibm,os-term behavior, the return to the
  RTAS will always occur unless there is a kernel assisted dump active as
  initiated by an ibm,configure-kernel-dump call."

This patch checks for the ibm,extended-os-term property and calls ibm,os-term
if it exists.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/rtas.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 74367841615a..0e1ec6f746f6 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -691,10 +691,14 @@ void rtas_os_term(char *str)
 {
 	int status;
 
-	if (panic_timeout)
-		return;
-
-	if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term"))
+	/*
+	 * Firmware with the ibm,extended-os-term property is guaranteed
+	 * to always return from an ibm,os-term call. Earlier versions without
+	 * this property may terminate the partition which we want to avoid
+	 * since it interferes with panic_timeout.
+	 */
+	if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
+	    RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
 		return;
 
 	snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
@@ -705,8 +709,7 @@ void rtas_os_term(char *str)
 	} while (rtas_busy_delay(status));
 
 	if (status != 0)
-		printk(KERN_EMERG "ibm,os-term call failed %d\n",
-			       status);
+		printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
 }
 
 static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
-- 
cgit v1.2.2


From a32aaf14513da776556ad9995de8d83cd76ae60a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 7 Apr 2010 18:09:15 +1000
Subject: powerpc/vio: Add power management support

Adds support for suspend/resume for VIO devices. This is needed for
support for HMC initiated hibernation.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 2a3428bef83a..b8e311d64ad5 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1381,6 +1381,29 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
+static int vio_pm_suspend(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm && pm->suspend)
+		return pm->suspend(dev);
+	return 0;
+}
+
+static int vio_pm_resume(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm && pm->resume)
+		return pm->resume(dev);
+	return 0;
+}
+
+const struct dev_pm_ops vio_dev_pm_ops = {
+	.suspend = vio_pm_suspend,
+	.resume = vio_pm_resume,
+};
+
 static struct bus_type vio_bus_type = {
 	.name = "vio",
 	.dev_attrs = vio_dev_attrs,
@@ -1388,6 +1411,7 @@ static struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
+	.pm = &vio_dev_pm_ops,
 };
 
 /**
-- 
cgit v1.2.2


From 359e4284a3f37aba7fd06d993863de2509d86f54 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Wed, 7 Apr 2010 18:10:20 +1000
Subject: powerpc: Add kprobe-based event tracer

This patch ports the kprobe-based event tracer to powerpc. This patch
is based on x86 port. This brings powerpc on par with x86.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ptrace.c | 103 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ed2cfe17d25e..7a0c0199ea28 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -38,6 +38,109 @@
 #include <asm/pgtable.h>
 #include <asm/system.h>
 
+/*
+ * The parameter save area on the stack is used to store arguments being passed
+ * to callee function and is located at fixed offset from stack pointer.
+ */
+#ifdef CONFIG_PPC32
+#define PARAMETER_SAVE_AREA_OFFSET	24  /* bytes */
+#else /* CONFIG_PPC32 */
+#define PARAMETER_SAVE_AREA_OFFSET	48  /* bytes */
+#endif
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define STR(s)	#s			/* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num)	\
+	{.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	GPR_OFFSET_NAME(0),
+	GPR_OFFSET_NAME(1),
+	GPR_OFFSET_NAME(2),
+	GPR_OFFSET_NAME(3),
+	GPR_OFFSET_NAME(4),
+	GPR_OFFSET_NAME(5),
+	GPR_OFFSET_NAME(6),
+	GPR_OFFSET_NAME(7),
+	GPR_OFFSET_NAME(8),
+	GPR_OFFSET_NAME(9),
+	GPR_OFFSET_NAME(10),
+	GPR_OFFSET_NAME(11),
+	GPR_OFFSET_NAME(12),
+	GPR_OFFSET_NAME(13),
+	GPR_OFFSET_NAME(14),
+	GPR_OFFSET_NAME(15),
+	GPR_OFFSET_NAME(16),
+	GPR_OFFSET_NAME(17),
+	GPR_OFFSET_NAME(18),
+	GPR_OFFSET_NAME(19),
+	GPR_OFFSET_NAME(20),
+	GPR_OFFSET_NAME(21),
+	GPR_OFFSET_NAME(22),
+	GPR_OFFSET_NAME(23),
+	GPR_OFFSET_NAME(24),
+	GPR_OFFSET_NAME(25),
+	GPR_OFFSET_NAME(26),
+	GPR_OFFSET_NAME(27),
+	GPR_OFFSET_NAME(28),
+	GPR_OFFSET_NAME(29),
+	GPR_OFFSET_NAME(30),
+	GPR_OFFSET_NAME(31),
+	REG_OFFSET_NAME(nip),
+	REG_OFFSET_NAME(msr),
+	REG_OFFSET_NAME(ctr),
+	REG_OFFSET_NAME(link),
+	REG_OFFSET_NAME(xer),
+	REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+	REG_OFFSET_NAME(softe),
+#else
+	REG_OFFSET_NAME(mq),
+#endif
+	REG_OFFSET_NAME(trap),
+	REG_OFFSET_NAME(dar),
+	REG_OFFSET_NAME(dsisr),
+	REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:	the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (roff->offset == offset)
+			return roff->name;
+	return NULL;
+}
+
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
-- 
cgit v1.2.2


From 963cf3dc6342fe60bb78c615884537621abca0bc Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 19 Feb 2010 11:00:40 +0100
Subject: KVM: PPC: Add helpers to call FPU instructions

To emulate paired single instructions, we need to be able to call FPU
operations from within the kernel. Since we don't want gcc to spill
arbitrary FPU code everywhere, we tell it to use a soft fpu.

Since we know we can really call the FPU in safe areas, let's also add
some calls that we can later use to actually execute real world FPU
operations on the host's FPU.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/ppc_ksyms.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ab3e392ac63c..58fdb3a784de 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -101,6 +101,8 @@ EXPORT_SYMBOL(pci_dram_offset);
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(kernel_thread);
 
+EXPORT_SYMBOL_GPL(cvt_df);
+EXPORT_SYMBOL_GPL(cvt_fd);
 EXPORT_SYMBOL(giveup_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
-- 
cgit v1.2.2


From 4b83c330b4d38e869111bda6e9077d4f61ed974a Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 7 Apr 2010 15:33:44 +0000
Subject: powerpc/numa: Add form 1 NUMA affinity

Firmware changed the way it represents memory and cpu affinity on POWER7.
Unfortunately the old method now caps the topology to work around issues
with legacy operating systems. For Linux to get the correct topology we
need to use the new form 1 affinity information.

We set the form 1 field in the client architecture, and if we see "1" in the
ibm,associativity-form property firmware supports form 1 affinity and
we should look at the first field in the ibm,associativity-reference-points
array. If not we use the second field as we always have.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5f306c4946e5..97d4bd9442d3 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -653,6 +653,7 @@ static void __init early_cmdline_parse(void)
 #else
 #define OV5_CMO			0x00
 #endif
+#define OV5_TYPE1_AFFINITY	0x80	/* Type 1 NUMA affinity */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX		0x02	/* Linux is our OS */
@@ -706,7 +707,7 @@ static unsigned char ibm_architecture_vec[] = {
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
 	0,
 	OV5_CMO,
-	0,
+	OV5_TYPE1_AFFINITY,
 	0,
 	0,
 	0,
-- 
cgit v1.2.2


From b8b14c66765ccba884c5c4570bf8be361d211d95 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Mon, 3 May 2010 07:36:22 -0500
Subject: powerpc/swiotlb: Fix off by one in determining boundary of which ops
 to use

When we compare the devices DMA mask to the amount of memory we need to
make sure we treat the DMA mask as an address boundary.  For example if
the DMA_MASK(32) and we have 4G of memory we'd incorrectly set the dma
ops to swiotlb.  We need to add one to the dma mask when we convert it.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/dma-swiotlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 59c928564a03..4ff4da2c238b 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -1,7 +1,8 @@
 /*
  * Contains routines needed to support swiotlb for ppc.
  *
- * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
+ * Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
+ * Author: Becky Bruce
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
 	sd->max_direct_dma_addr = 0;
 
 	/* May need to bounce if the device can't address all of DRAM */
-	if (dma_get_mask(dev) < lmb_end_of_DRAM())
+	if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM())
 		set_dma_ops(dev, &swiotlb_dma_ops);
 
 	return NOTIFY_DONE;
-- 
cgit v1.2.2


From 471c70ff39809af783c7718defe574a9ba81dd26 Mon Sep 17 00:00:00 2001
From: Torez Smith <lnxtorez@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 10:43:01 +0000
Subject: powerpc/booke: Add Stack Marking support to Booke Exception Prolog

This patch adds a marker to the exception stack frame to aid in debugging.
It's already inserted on other platforms and xmon recognizes it and
identifies exception frames when showing stack traces.

Signed-off-by: Torez Smith  <lnxtorez@linux.vnet.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/head_booke.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 50504ae39cb7..a0bf158c8b47 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -1,6 +1,7 @@
 #ifndef __HEAD_BOOKE_H__
 #define __HEAD_BOOKE_H__
 
+#include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
 /*
  * Macros used for common Book-e exception handling
  */
@@ -48,6 +49,9 @@
 	stw	r10,0(r11);						     \
 	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
 	stw	r0,GPR0(r11);						     \
+	lis	r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
+	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
+	stw	r10, 8(r11);						     \
 	SAVE_4GPRS(3, r11);						     \
 	SAVE_2GPRS(7, r11)
 
-- 
cgit v1.2.2


From 795033c344d88dc6aa5106d0cc358656f29bd722 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 10:43:07 +0000
Subject: powerpc/44x: break out cpu init code into stand-alone function

The 47x platform supports multiple cores and shares code with 44x.
Break out code that is common for initializing the primary and secondary
cpus into a function which can be called for both.

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/head_44x.S | 330 +++++++++++++++++++++--------------------
 1 file changed, 171 insertions(+), 159 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 711368b993f2..39be049a7850 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -69,165 +69,7 @@ _ENTRY(_start);
 	mr	r27,r7
 	li	r24,0		/* CPU number */
 
-/*
- * In case the firmware didn't do it, we apply some workarounds
- * that are good for all 440 core variants here
- */
-	mfspr	r3,SPRN_CCR0
-	rlwinm	r3,r3,0,0,27	/* disable icache prefetch */
-	isync
-	mtspr	SPRN_CCR0,r3
-	isync
-	sync
-
-/*
- * Set up the initial MMU state
- *
- * We are still executing code at the virtual address
- * mappings set by the firmware for the base of RAM.
- *
- * We first invalidate all TLB entries but the one
- * we are running from.  We then load the KERNELBASE
- * mappings so we can begin to use kernel addresses
- * natively and so the interrupt vector locations are
- * permanently pinned (necessary since Book E
- * implementations always have translation enabled).
- *
- * TODO: Use the known TLB entry we are running from to
- *	 determine which physical region we are located
- *	 in.  This can be used to determine where in RAM
- *	 (on a shared CPU system) or PCI memory space
- *	 (on a DRAMless system) we are located.
- *       For now, we assume a perfect world which means
- *	 we are located at the base of DRAM (physical 0).
- */
-
-/*
- * Search TLB for entry that we are currently using.
- * Invalidate all entries but the one we are using.
- */
-	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
-	mfspr	r3,SPRN_PID			/* Get PID */
-	mfmsr	r4				/* Get MSR */
-	andi.	r4,r4,MSR_IS@l			/* TS=1? */
-	beq	wmmucr				/* If not, leave STS=0 */
-	oris	r3,r3,PPC44x_MMUCR_STS@h	/* Set STS=1 */
-wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
-	sync
-
-	bl	invstr				/* Find our address */
-invstr:	mflr	r5				/* Make it accessible */
-	tlbsx	r23,0,r5			/* Find entry we are in */
-	li	r4,0				/* Start at TLB entry 0 */
-	li	r3,0				/* Set PAGEID inval value */
-1:	cmpw	r23,r4				/* Is this our entry? */
-	beq	skpinv				/* If so, skip the inval */
-	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
-skpinv:	addi	r4,r4,1				/* Increment */
-	cmpwi	r4,64				/* Are we done? */
-	bne	1b				/* If not, repeat */
-	isync					/* If so, context change */
-
-/*
- * Configure and load pinned entry into TLB slot 63.
- */
-
-	lis	r3,PAGE_OFFSET@h
-	ori	r3,r3,PAGE_OFFSET@l
-
-	/* Kernel is at the base of RAM */
-	li r4, 0			/* Load the kernel physical address */
-
-	/* Load the kernel PID = 0 */
-	li	r0,0
-	mtspr	SPRN_PID,r0
-	sync
-
-	/* Initialize MMUCR */
-	li	r5,0
-	mtspr	SPRN_MMUCR,r5
-	sync
-
- 	/* pageid fields */
-	clrrwi	r3,r3,10		/* Mask off the effective page number */
-	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
-
-	/* xlat fields */
-	clrrwi	r4,r4,10		/* Mask off the real page number */
-					/* ERPN is 0 for first 4GB page */
-
-	/* attrib fields */
-	/* Added guarded bit to protect against speculative loads/stores */
-	li	r5,0
-	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-        li      r0,63                    /* TLB slot 63 */
-
-	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
-	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
-	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
-
-	/* Force context change */
-	mfmsr	r0
-	mtspr	SPRN_SRR1, r0
-	lis	r0,3f@h
-	ori	r0,r0,3f@l
-	mtspr	SPRN_SRR0,r0
-	sync
-	rfi
-
-	/* If necessary, invalidate original entry we used */
-3:	cmpwi	r23,63
-	beq	4f
-	li	r6,0
-	tlbwe   r6,r23,PPC44x_TLB_PAGEID
-	isync
-
-4:
-#ifdef CONFIG_PPC_EARLY_DEBUG_44x
-	/* Add UART mapping for early debug. */
-
- 	/* pageid fields */
-	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
-	ori	r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
-
-	/* xlat fields */
-	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
-	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
-
-	/* attrib fields */
-	li	r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
-        li      r0,62                    /* TLB slot 0 */
-
-	tlbwe	r3,r0,PPC44x_TLB_PAGEID
-	tlbwe	r4,r0,PPC44x_TLB_XLAT
-	tlbwe	r5,r0,PPC44x_TLB_ATTRIB
-
-	/* Force context change */
-	isync
-#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
-
-	/* Establish the interrupt vector offsets */
-	SET_IVOR(0,  CriticalInput);
-	SET_IVOR(1,  MachineCheck);
-	SET_IVOR(2,  DataStorage);
-	SET_IVOR(3,  InstructionStorage);
-	SET_IVOR(4,  ExternalInput);
-	SET_IVOR(5,  Alignment);
-	SET_IVOR(6,  Program);
-	SET_IVOR(7,  FloatingPointUnavailable);
-	SET_IVOR(8,  SystemCall);
-	SET_IVOR(9,  AuxillaryProcessorUnavailable);
-	SET_IVOR(10, Decrementer);
-	SET_IVOR(11, FixedIntervalTimer);
-	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError);
-	SET_IVOR(14, InstructionTLBError);
-	SET_IVOR(15, DebugCrit);
-
-	/* Establish the interrupt vector base */
-	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
-	mtspr	SPRN_IVPR,r4
+	bl	init_cpu_state
 
 	/*
 	 * This is where the main kernel code starts.
@@ -646,6 +488,176 @@ _GLOBAL(set_context)
 	isync			/* Force context change */
 	blr
 
+/*
+ * Init CPU state. This is called at boot time or for secondary CPUs
+ * to setup initial TLB entries, setup IVORs, etc...
+ */
+_GLOBAL(init_cpu_state)
+	mflr	r22
+/*
+ * In case the firmware didn't do it, we apply some workarounds
+ * that are good for all 440 core variants here
+ */
+	mfspr	r3,SPRN_CCR0
+	rlwinm	r3,r3,0,0,27	/* disable icache prefetch */
+	isync
+	mtspr	SPRN_CCR0,r3
+	isync
+	sync
+
+/*
+ * Set up the initial MMU state
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ *
+ * We first invalidate all TLB entries but the one
+ * we are running from.  We then load the KERNELBASE
+ * mappings so we can begin to use kernel addresses
+ * natively and so the interrupt vector locations are
+ * permanently pinned (necessary since Book E
+ * implementations always have translation enabled).
+ *
+ * TODO: Use the known TLB entry we are running from to
+ *	 determine which physical region we are located
+ *	 in.  This can be used to determine where in RAM
+ *	 (on a shared CPU system) or PCI memory space
+ *	 (on a DRAMless system) we are located.
+ *       For now, we assume a perfect world which means
+ *	 we are located at the base of DRAM (physical 0).
+ */
+
+/*
+ * Search TLB for entry that we are currently using.
+ * Invalidate all entries but the one we are using.
+ */
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	wmmucr				/* If not, leave STS=0 */
+	oris	r3,r3,PPC44x_MMUCR_STS@h	/* Set STS=1 */
+wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	bl	invstr				/* Find our address */
+invstr:	mflr	r5				/* Make it accessible */
+	tlbsx	r23,0,r5			/* Find entry we are in */
+	li	r4,0				/* Start at TLB entry 0 */
+	li	r3,0				/* Set PAGEID inval value */
+1:	cmpw	r23,r4				/* Is this our entry? */
+	beq	skpinv				/* If so, skip the inval */
+	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
+skpinv:	addi	r4,r4,1				/* Increment */
+	cmpwi	r4,64				/* Are we done? */
+	bne	1b				/* If not, repeat */
+	isync					/* If so, context change */
+
+/*
+ * Configure and load pinned entry into TLB slot 63.
+ */
+
+	lis	r3,PAGE_OFFSET@h
+	ori	r3,r3,PAGE_OFFSET@l
+
+	/* Kernel is at the base of RAM */
+	li r4, 0			/* Load the kernel physical address */
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+	/* pageid fields */
+	clrrwi	r3,r3,10		/* Mask off the effective page number */
+	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+	/* xlat fields */
+	clrrwi	r4,r4,10		/* Mask off the real page number */
+					/* ERPN is 0 for first 4GB page */
+
+	/* attrib fields */
+	/* Added guarded bit to protect against speculative loads/stores */
+	li	r5,0
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+        li      r0,63                    /* TLB slot 63 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
+	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* If necessary, invalidate original entry we used */
+3:	cmpwi	r23,63
+	beq	4f
+	li	r6,0
+	tlbwe   r6,r23,PPC44x_TLB_PAGEID
+	isync
+
+4:
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+	/* Add UART mapping for early debug. */
+
+	/* pageid fields */
+	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+	ori	r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
+
+	/* xlat fields */
+	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+	/* attrib fields */
+	li	r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
+        li      r0,62                    /* TLB slot 0 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID
+	tlbwe	r4,r0,PPC44x_TLB_XLAT
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheck);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError);
+	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(15, DebugCrit);
+
+	/* Establish the interrupt vector base */
+	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
+	mtspr	SPRN_IVPR,r4
+
+	addis	r22,r22,KERNELBASE@h
+	mtlr	r22
+	blr
+
 /*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
-- 
cgit v1.2.2


From e7f75ad01d590243904c2d95ab47e6b2e9ef6dad Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 10:43:12 +0000
Subject: powerpc/47x: Base ppc476 support

This patch adds the base support for the 476 processor.  The code was
primarily written by Ben Herrenschmidt and Torez Smith, but I've been
maintaining it for a while.

The goal is to have a single binary that will run on 44x and 47x, but
we still have some details to work out.  The biggest is that the L1 cache
line size differs on the two platforms, but it's currently a compile-time
option.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Torez Smith  <lnxtorez@linux.vnet.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/cputable.c |  13 ++
 arch/powerpc/kernel/entry_32.S |   5 +
 arch/powerpc/kernel/head_44x.S | 502 +++++++++++++++++++++++++++++++++++++++--
 arch/powerpc/kernel/misc_32.S  |   9 +-
 arch/powerpc/kernel/smp.c      |   8 +
 5 files changed, 519 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 8af4949434b2..a1d845839727 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1701,6 +1701,19 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
+	{ /* 476 core */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x11a50000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x |
+			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.platform		= "ppc470",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1175a8539e6c..ed4aeb96398b 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -373,11 +373,13 @@ syscall_exit_cont:
 	bnel-	load_dbcr0
 #endif
 #ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
 	lis	r4,icache_44x_need_flush@ha
 	lwz	r5,icache_44x_need_flush@l(r4)
 	cmplwi	cr0,r5,0
 	bne-	2f
 1:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
 #endif /* CONFIG_44x */
 BEGIN_FTR_SECTION
 	lwarx	r7,0,r1
@@ -848,6 +850,9 @@ resume_kernel:
 	/* interrupts are hard-disabled at this point */
 restore:
 #ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+	b	1f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
 	lis	r4,icache_44x_need_flush@ha
 	lwz	r5,icache_44x_need_flush@l(r4)
 	cmplwi	cr0,r5,0
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 39be049a7850..1acd175428c4 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -37,6 +37,7 @@
 #include <asm/thread_info.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/synch.h>
 #include "head_booke.h"
 
 
@@ -191,7 +192,7 @@ interrupt_base:
 #endif
 
 	/* Data TLB Error Interrupt */
-	START_EXCEPTION(DataTLBError)
+	START_EXCEPTION(DataTLBError44x)
 	mtspr	SPRN_SPRG_WSCRATCH0, r10		/* Save some working registers */
 	mtspr	SPRN_SPRG_WSCRATCH1, r11
 	mtspr	SPRN_SPRG_WSCRATCH2, r12
@@ -282,7 +283,7 @@ tlb_44x_patch_hwater_D:
 	mfspr	r10,SPRN_DEAR
 
 	 /* Jump to common tlb load */
-	b	finish_tlb_load
+	b	finish_tlb_load_44x
 
 2:
 	/* The bailout.  Restore registers to pre-exception conditions
@@ -302,7 +303,7 @@ tlb_44x_patch_hwater_D:
 	 * information from different registers and bailout
 	 * to a different point.
 	 */
-	START_EXCEPTION(InstructionTLBError)
+	START_EXCEPTION(InstructionTLBError44x)
 	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
 	mtspr	SPRN_SPRG_WSCRATCH1, r11
 	mtspr	SPRN_SPRG_WSCRATCH2, r12
@@ -378,7 +379,7 @@ tlb_44x_patch_hwater_I:
 	mfspr	r10,SPRN_SRR0
 
 	/* Jump to common TLB load point */
-	b	finish_tlb_load
+	b	finish_tlb_load_44x
 
 2:
 	/* The bailout.  Restore registers to pre-exception conditions
@@ -392,15 +393,7 @@ tlb_44x_patch_hwater_I:
 	mfspr	r10, SPRN_SPRG_RSCRATCH0
 	b	InstructionStorage
 
-	/* Debug Interrupt */
-	DEBUG_CRIT_EXCEPTION
-
-/*
- * Local functions
-  */
-
 /*
-
  * Both the instruction and data TLB miss get to this
  * point to load the TLB.
  * 	r10 - EA of fault
@@ -410,7 +403,7 @@ tlb_44x_patch_hwater_I:
  *	MMUCR - loaded with proper value when we get here
  *	Upon exit, we reload everything and RFI.
  */
-finish_tlb_load:
+finish_tlb_load_44x:
 	/* Combine RPN & ERPN an write WS 0 */
 	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
 	tlbwe	r11,r13,PPC44x_TLB_XLAT
@@ -443,6 +436,227 @@ finish_tlb_load:
 	mfspr	r10, SPRN_SPRG_RSCRATCH0
 	rfi					/* Force context change */
 
+/* TLB error interrupts for 476
+ */
+#ifdef CONFIG_PPC_47x
+	START_EXCEPTION(DataTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Mask of required permission bits. Note that while we
+	 * do copy ESR:ST to _PAGE_RW position as trying to write
+	 * to an RO page is pretty common, we don't do it with
+	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
+	 * event so I'd rather take the overhead when it happens
+	 * rather than adding an instruction here. We should measure
+	 * whether the whole thing is worth it in the first place
+	 * as we could avoid loading SPRN_ESR completely in the first
+	 * place...
+	 *
+	 * TODO: Is it worth doing that mfspr & rlwimi in the first
+	 *       place or can we save a couple of instructions here ?
+	 */
+	mfspr	r12,SPRN_ESR
+	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
+	rlwimi	r13,r12,10,30,30
+
+	/* Load the PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	 /* Jump to common tlb load */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11,SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13,SPRN_SPRG_RSCRATCH3
+	mfspr	r12,SPRN_SPRG_RSCRATCH2
+	mfspr	r11,SPRN_SPRG_RSCRATCH1
+	mfspr	r10,SPRN_SPRG_RSCRATCH0
+	b	DataStorage
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Make up the required permissions */
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+	/* Load PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	/* Jump to common TLB load point */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - free to use
+ * 	r11 - PTE high word value
+ *	r12 - PTE low word value
+ *      r13 - free to use
+ *	MMUCR - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_47x:
+	/* Combine RPN & ERPN an write WS 1 */
+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
+	tlbwe	r11,r13,1
+
+	/* And make up word 2 */
+	li	r10,0xf85			/* Mask to apply from PTE */
+	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
+	and	r11,r12,r10			/* Mask PTE bits to keep */
+	andi.	r10,r12,_PAGE_USER		/* User page ? */
+	beq	1f				/* nope, leave U bits empty */
+	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
+1:	tlbwe	r11,r13,2
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	rfi
+
+#endif /* CONFIG_PPC_47x */
+
+	/* Debug Interrupt */
+	/*
+	 * This statement needs to exist at the end of the IVPR
+	 * definition just in case you end up taking a debug
+	 * exception within another exception.
+	 */
+	DEBUG_CRIT_EXCEPTION
+
 /*
  * Global functions
  */
@@ -491,9 +705,18 @@ _GLOBAL(set_context)
 /*
  * Init CPU state. This is called at boot time or for secondary CPUs
  * to setup initial TLB entries, setup IVORs, etc...
+ *
  */
 _GLOBAL(init_cpu_state)
 	mflr	r22
+#ifdef CONFIG_PPC_47x
+	/* We use the PVR to differenciate 44x cores from 476 */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476@h
+	beq	head_start_47x
+#endif /* CONFIG_PPC_47x */
+
 /*
  * In case the firmware didn't do it, we apply some workarounds
  * that are good for all 440 core variants here
@@ -506,7 +729,7 @@ _GLOBAL(init_cpu_state)
 	sync
 
 /*
- * Set up the initial MMU state
+ * Set up the initial MMU state for 44x
  *
  * We are still executing code at the virtual address
  * mappings set by the firmware for the base of RAM.
@@ -646,16 +869,257 @@ skpinv:	addi	r4,r4,1				/* Increment */
 	SET_IVOR(10, Decrementer);
 	SET_IVOR(11, FixedIntervalTimer);
 	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError);
-	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(13, DataTLBError44x);
+	SET_IVOR(14, InstructionTLBError44x);
 	SET_IVOR(15, DebugCrit);
 
+	b	head_start_common
+
+
+#ifdef CONFIG_PPC_47x
+
+#ifdef CONFIG_SMP
+
+/* Entry point for secondary 47x processors */
+_GLOBAL(start_secondary_47x)
+        mr      r24,r3          /* CPU number */
+
+	bl	init_cpu_state
+
+	/* Now we need to bolt the rest of kernel memory which
+	 * is done in C code. We must be careful because our task
+	 * struct or our stack can (and will probably) be out
+	 * of reach of the initial 256M TLB entry, so we use a
+	 * small temporary stack in .bss for that. This works
+	 * because only one CPU at a time can be in this code
+	 */
+	lis	r1,temp_boot_stack@h
+	ori	r1,r1,temp_boot_stack@l
+	addi	r1,r1,1024-STACK_FRAME_OVERHEAD
+	li	r0,0
+	stw	r0,0(r1)
+	bl	mmu_init_secondary
+
+	/* Now we can get our task struct and real stack pointer */
+
+	/* Get current_thread_info and current */
+	lis	r1,secondary_ti@ha
+	lwz	r1,secondary_ti@l(r1)
+	lwz	r2,TI_TASK(r1)
+
+	/* Current stack pointer */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r0,0
+	stw	r0,0(r1)
+
+	/* Kernel stack for exception entry in SPRG3 */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	b	start_secondary
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ */
+
+head_start_47x:
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	1f				/* If not, leave STS=0 */
+	oris	r3,r3,PPC47x_MMUCR_STS@h	/* Set STS=1 */
+1:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	/* Find the entry we are running from */
+	bl	1f
+1:	mflr	r23
+	tlbsx	r23,0,r23
+	tlbre	r24,r23,0
+	tlbre	r25,r23,1
+	tlbre	r26,r23,2
+
+/*
+ * Cleanup time
+ */
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+clear_all_utlb_entries:
+
+	#; Set initial values.
+
+	addis		r3,0,0x8000
+	addi		r4,0,0
+	addi		r5,0,0
+	b		clear_utlb_entry
+
+	#; Align the loop to speed things up.
+
+	.align		6
+
+clear_utlb_entry:
+
+	tlbwe		r4,r3,0
+	tlbwe		r5,r3,1
+	tlbwe		r5,r3,2
+	addis		r3,r3,0x2000
+	cmpwi		r3,0
+	bne		clear_utlb_entry
+	addis		r3,0,0x8000
+	addis		r4,r4,0x100
+	cmpwi		r4,0
+	bne		clear_utlb_entry
+
+	#; Restore original entry.
+
+	oris	r23,r23,0x8000  /* specify the way */
+	tlbwe		r24,r23,0
+	tlbwe		r25,r23,1
+	tlbwe		r26,r23,2
+
+/*
+ * Configure and load pinned entry into TLB for the kernel core
+ */
+
+	lis	r3,PAGE_OFFSET@h
+	ori	r3,r3,PAGE_OFFSET@l
+
+	/* Kernel is at the base of RAM */
+	li r4, 0			/* Load the kernel physical address */
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Word 0 */
+	clrrwi	r3,r3,12		/* Mask off the effective page number */
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
+
+	/* Word 1 */
+	clrrwi	r4,r4,12		/* Mask off the real page number */
+					/* ERPN is 0 for first 4GB page */
+	/* Word 2 */
+	li	r5,0
+	ori	r5,r5,PPC47x_TLB2_S_RWX
+#ifdef CONFIG_SMP
+	ori	r5,r5,PPC47x_TLB2_M
+#endif
+
+	/* We write to way 0 and bolted 0 */
+	lis	r0,0x8800
+	tlbwe	r3,r0,0
+	tlbwe	r4,r0,1
+	tlbwe	r5,r0,2
+
+/*
+ * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
+ * them up later
+ */
+	LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
+	mtspr	SPRN_SSPCR,r3
+	mtspr	SPRN_USPCR,r3
+	LOAD_REG_IMMEDIATE(r3, 0x12345670)
+	mtspr	SPRN_ISPCR,r3
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* Invalidate original entry we used */
+3:
+	rlwinm	r24,r24,0,21,19 /* clear the "valid" bit */
+	tlbwe	r24,r23,0
+	addi	r24,0,0
+	tlbwe	r24,r23,1
+	tlbwe	r24,r23,2
+	isync                   /* Clear out the shadow TLB entries */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+	/* Add UART mapping for early debug. */
+
+	/* Word 0 */
+	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
+
+	/* Word 1 */
+	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+	/* Word 2 */
+	li	r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
+
+	/* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
+	 * congruence class as the kernel, we need to make sure of it at
+	 * some point
+	 */
+        lis	r0,0x8d00
+	tlbwe	r3,r0,0
+	tlbwe	r4,r0,1
+	tlbwe	r5,r0,2
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheckA);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError47x);
+	SET_IVOR(14, InstructionTLBError47x);
+	SET_IVOR(15, DebugCrit);
+
+	/* We configure icbi to invalidate 128 bytes at a time since the
+	 * current 32-bit kernel code isn't too happy with icache != dcache
+	 * block size
+	 */
+	mfspr	r3,SPRN_CCR0
+	oris	r3,r3,0x0020
+	mtspr	SPRN_CCR0,r3
+	isync
+
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Here we are back to code that is common between 44x and 47x
+ *
+ * We proceed to further kernel initialization and return to the
+ * main kernel entry
+ */
+head_start_common:
 	/* Establish the interrupt vector base */
 	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
 	mtspr	SPRN_IVPR,r4
 
 	addis	r22,r22,KERNELBASE@h
 	mtlr	r22
+	isync
 	blr
 
 /*
@@ -683,3 +1147,9 @@ swapper_pg_dir:
  */
 abatron_pteptrs:
 	.space	8
+
+#ifdef CONFIG_SMP
+	.align	12
+temp_boot_stack:
+	.space	1024
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 8649f536f8df..8043d1b73cf0 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -441,7 +441,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	0b
 	sync
-#ifndef CONFIG_44x
+#ifdef CONFIG_44x
 	/* We don't flush the icache on 44x. Those have a virtual icache
 	 * and we don't have access to the virtual address here (it's
 	 * not the page vaddr but where it's mapped in user space). The
@@ -449,15 +449,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
 	 * a change in the address space occurs, before returning to
 	 * user space
 	 */
+BEGIN_MMU_FTR_SECTION
+	blr
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
+#endif /* CONFIG_44x */
 	mtctr	r4
 1:	icbi	0,r6
 	addi	r6,r6,L1_CACHE_BYTES
 	bdnz	1b
 	sync
 	isync
-#endif /* CONFIG_44x */
 	blr
 
+#ifndef CONFIG_BOOKE
 /*
  * Flush a particular page from the data cache to RAM, identified
  * by its physical address.  We turn off the MMU so we can just use
@@ -490,6 +494,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
 	mtmsr	r10				/* restore DR */
 	isync
 	blr
+#endif /* CONFIG_BOOKE */
 
 /*
  * Clear pages using the dcbz instruction, which doesn't cause any
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e36f94f7411a..3fe4de2b685e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -495,6 +495,14 @@ int __devinit start_secondary(void *unused)
 	current->active_mm = &init_mm;
 
 	smp_store_cpu_info(cpu);
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+	/* Clear any pending timer interrupts */
+	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+	/* Enable decrementer interrupt */
+	mtspr(SPRN_TCR, TCR_DIE);
+#endif
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
 	cpu_callin_map[cpu] = 1;
-- 
cgit v1.2.2


From fc5e709731429bc2db27897630e7c0089f297680 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 03:43:18 +0000
Subject: powerpc/476: add machine check handler for 47x core

The 47x core's MCSR varies from 44x, so it needs it's own machine check
handler.

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/cputable.c |  1 +
 arch/powerpc/kernel/traps.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index a1d845839727..ad6baf834a76 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1712,6 +1712,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
 		.platform		= "ppc470",
 	},
 	{	/* default match */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 29d128eb6c43..ca7ce85ebc2e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -380,6 +380,46 @@ int machine_check_440A(struct pt_regs *regs)
 	}
 	return 0;
 }
+
+int machine_check_47x(struct pt_regs *regs)
+{
+	unsigned long reason = get_mc_reason(regs);
+	u32 mcsr;
+
+	printk(KERN_ERR "Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP) {
+		printk(KERN_ERR
+		       "Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+		return 0;
+	}
+	mcsr = mfspr(SPRN_MCSR);
+	if (mcsr & MCSR_IB)
+		printk(KERN_ERR "Instruction Read PLB Error\n");
+	if (mcsr & MCSR_DRB)
+		printk(KERN_ERR "Data Read PLB Error\n");
+	if (mcsr & MCSR_DWB)
+		printk(KERN_ERR "Data Write PLB Error\n");
+	if (mcsr & MCSR_TLBP)
+		printk(KERN_ERR "TLB Parity Error\n");
+	if (mcsr & MCSR_ICP) {
+		flush_instruction_cache();
+		printk(KERN_ERR "I-Cache Parity Error\n");
+	}
+	if (mcsr & MCSR_DCSP)
+		printk(KERN_ERR "D-Cache Search Parity Error\n");
+	if (mcsr & PPC47x_MCSR_GPR)
+		printk(KERN_ERR "GPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_FPR)
+		printk(KERN_ERR "FPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_IPR)
+		printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+	/* Clear MCSR */
+	mtspr(SPRN_MCSR, mcsr);
+
+	return 0;
+}
 #elif defined(CONFIG_E500)
 int machine_check_e500(struct pt_regs *regs)
 {
-- 
cgit v1.2.2


From 221c185d4e11b4061409da5d592779ced484614c Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 10:43:24 +0000
Subject: powerpc/476: Add isync after loading mmu and debug spr's

476 requires an isync after loading MMU and debug related SPR's.  Some of
these are in performance-critical paths and may need to be optimized, but
initially, we're playing it safe.

Signed-off-by: Torez Smith  <lnxtorez@linux.vnet.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/kprobes.c | 3 +++
 arch/powerpc/kernel/process.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index b36f074524ad..c533525ca56a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -114,6 +114,9 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	regs->msr &= ~MSR_CE;
 	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#ifdef CONFIG_PPC_47x
+	isync();
+#endif
 #endif
 
 	/*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e4d71ced97ef..9d255b4f0a0e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -371,6 +371,9 @@ int set_dabr(unsigned long dabr)
 	/* XXX should we have a CPU_FTR_HAS_DABR ? */
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	mtspr(SPRN_DAC1, dabr);
+#ifdef CONFIG_PPC_47x
+	isync();
+#endif
 #elif defined(CONFIG_PPC_BOOK3S)
 	mtspr(SPRN_DABR, dabr);
 #endif
-- 
cgit v1.2.2


From b4e8c8dd8456c1d3685fb5b715c9795d250f500e Mon Sep 17 00:00:00 2001
From: Torez Smith <lnxtorez@linux.vnet.ibm.com>
Date: Fri, 5 Mar 2010 10:45:54 +0000
Subject: powerpc/4xx: Simple platform for the ISS 4xx simulator

This is a trivial 4xx plaform that uses the new simple bsp from
Josh and is handy to use in simulators such as ISS or even Mambo
who don't properly implement most of the actual devices in the
SoC but really only the core.

Signed-off-by: Torez Smith  <lnxtorez@linux.vnet.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/cputable.c | 15 +++++++++++++++
 arch/powerpc/kernel/head_44x.S |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ad6baf834a76..9556be903e96 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1715,6 +1715,21 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_47x,
 		.platform		= "ppc470",
 	},
+	{ /* 476 iss */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00050000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_47x |
+			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 1acd175428c4..5ab484ef06a7 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -715,6 +715,8 @@ _GLOBAL(init_cpu_state)
 	srwi	r3,r3,16
 	cmplwi	cr0,r3,PVR_476@h
 	beq	head_start_47x
+	cmplwi	cr0,r3,PVR_476_ISS@h
+	beq	head_start_47x
 #endif /* CONFIG_PPC_47x */
 
 /*
-- 
cgit v1.2.2


From d5f86fe3457f48f27eecd40c605e7876d026af7c Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:33 +0000
Subject: powerpc/cpumask: Convert rtasd to new cpumask API

Use cpumask_first, cpumask_next in rtasd code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/rtasd.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 4190eae7850a..e907ca66f75a 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -411,9 +411,9 @@ static void rtas_event_scan(struct work_struct *w)
 
 	get_online_cpus();
 
-	cpu = next_cpu(smp_processor_id(), cpu_online_map);
-	if (cpu == NR_CPUS) {
-		cpu = first_cpu(cpu_online_map);
+	cpu = cpumask_next(smp_processor_id(), cpu_online_mask);
+        if (cpu >= nr_cpu_ids) {
+		cpu = cpumask_first(cpu_online_mask);
 
 		if (first_pass) {
 			first_pass = 0;
@@ -466,8 +466,8 @@ static void start_event_scan(void)
 	/* Retreive errors from nvram if any */
 	retreive_nvram_error_log();
 
-	schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work,
-				 event_scan_delay);
+	schedule_delayed_work_on(cpumask_first(cpu_online_mask),
+				 &event_scan_work, event_scan_delay);
 }
 
 static int __init rtas_init(void)
-- 
cgit v1.2.2


From bfb9126defa80cbed6d91ed9685b238b0d7e81c4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:34 +0000
Subject: powerpc/cpumask: Convert smp_cpus_done to new cpumask API

Use the new cpumask_* functions and dynamically allocate the cpumask in
smp_cpus_done.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3fe4de2b685e..17523a0abf66 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -562,19 +562,22 @@ int setup_profiling_timer(unsigned int multiplier)
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-	cpumask_t old_mask;
+	cpumask_var_t old_mask;
 
 	/* We want the setup_cpu() here to be called from CPU 0, but our
 	 * init thread may have been "borrowed" by another CPU in the meantime
 	 * se we pin us down to CPU 0 for a short while
 	 */
-	old_mask = current->cpus_allowed;
+	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
+	cpumask_copy(old_mask, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
 		smp_ops->setup_cpu(boot_cpuid);
 
-	set_cpus_allowed_ptr(current, &old_mask);
+	set_cpus_allowed_ptr(current, old_mask);
+
+	free_cpumask_var(old_mask);
 
 	snapshot_timebases();
 
-- 
cgit v1.2.2


From b6decb707952c678d110699abb5ed86d45ca6927 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:35 +0000
Subject: powerpc/cpumask: Convert fixup_irqs to new cpumask API

Use new cpumask_* functions, and dynamically allocate cpumask in fixup_irqs.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 17 ++++++++++-------
 arch/powerpc/kernel/smp.c |  4 ++--
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f2031c22..250ee2ebf288 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -290,30 +290,33 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+void fixup_irqs(const struct cpumask *map)
 {
 	struct irq_desc *desc;
 	unsigned int irq;
 	static int warned;
+	cpumask_var_t mask;
 
-	for_each_irq(irq) {
-		cpumask_t mask;
+	alloc_cpumask_var(&mask, GFP_KERNEL);
 
+	for_each_irq(irq) {
 		desc = irq_to_desc(irq);
 		if (desc && desc->status & IRQ_PER_CPU)
 			continue;
 
-		cpumask_and(&mask, desc->affinity, &map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		cpumask_and(mask, desc->affinity, map);
+		if (cpumask_any(mask) >= nr_cpu_ids) {
 			printk("Breaking affinity for irq %i\n", irq);
-			mask = map;
+			cpumask_copy(mask, map);
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, &mask);
+			desc->chip->set_affinity(irq, mask);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
+	free_cpumask_var(mask);
+
 	local_irq_enable();
 	mdelay(1);
 	local_irq_disable();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 17523a0abf66..62e82c25c583 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -313,7 +313,7 @@ int generic_cpu_disable(void)
 	set_cpu_online(cpu, false);
 #ifdef CONFIG_PPC64
 	vdso_data->processorCount--;
-	fixup_irqs(cpu_online_map);
+	fixup_irqs(cpu_online_mask);
 #endif
 	return 0;
 }
@@ -333,7 +333,7 @@ int generic_cpu_enable(unsigned int cpu)
 		cpu_relax();
 
 #ifdef CONFIG_PPC64
-	fixup_irqs(cpu_online_map);
+	fixup_irqs(cpu_online_mask);
 	/* counter the irq disable in fixup_irqs */
 	local_irq_enable();
 #endif
-- 
cgit v1.2.2


From 2c2df038450cbf628426183c9efffc17cfea3406 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:39 +0000
Subject: powerpc/cpumask: Refactor /proc/cpuinfo code

This separates the per cpu output from the summary output at the end of the
file, making it easier to convert to the new cpumask API in a subsequent
patch.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 62 ++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 48f0a008b20b..58699a43eaa2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -161,6 +161,38 @@ extern u32 cpu_temp_both(unsigned long cpu);
 DEFINE_PER_CPU(unsigned int, cpu_pvr);
 #endif
 
+static void show_cpuinfo_summary(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = NULL;
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
+	unsigned long bogosum = 0;
+	int i;
+	for_each_online_cpu(i)
+		bogosum += loops_per_jiffy;
+	seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+		   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
+#endif /* CONFIG_SMP && CONFIG_PPC32 */
+	seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
+	if (ppc_md.name)
+		seq_printf(m, "platform\t: %s\n", ppc_md.name);
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	if (model)
+		seq_printf(m, "model\t\t: %s\n", model);
+	of_node_put(root);
+
+	if (ppc_md.show_cpuinfo != NULL)
+		ppc_md.show_cpuinfo(m);
+
+#ifdef CONFIG_PPC32
+	/* Display the amount of memory */
+	seq_printf(m, "Memory\t\t: %d MB\n",
+		   (unsigned int)(total_memory / (1024 * 1024)));
+#endif
+}
+
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	unsigned long cpu_id = (unsigned long)v - 1;
@@ -169,35 +201,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	unsigned short min;
 
 	if (cpu_id == NR_CPUS) {
-		struct device_node *root;
-		const char *model = NULL;
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
-		unsigned long bogosum = 0;
-		int i;
-		for_each_online_cpu(i)
-			bogosum += loops_per_jiffy;
-		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
-			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP && CONFIG_PPC32 */
-		seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
-		if (ppc_md.name)
-			seq_printf(m, "platform\t: %s\n", ppc_md.name);
-		root = of_find_node_by_path("/");
-		if (root)
-			model = of_get_property(root, "model", NULL);
-		if (model)
-			seq_printf(m, "model\t\t: %s\n", model);
-		of_node_put(root);
-
-		if (ppc_md.show_cpuinfo != NULL)
-			ppc_md.show_cpuinfo(m);
-
-#ifdef CONFIG_PPC32
-		/* Display the amount of memory */
-		seq_printf(m, "Memory\t\t: %d MB\n",
-			   (unsigned int)(total_memory / (1024 * 1024)));
-#endif
-
+		show_cpuinfo_summary(m);
 		return 0;
 	}
 
-- 
cgit v1.2.2


From e6532c63cc3dbefc79936fc9c9c68a151004fe46 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:40 +0000
Subject: powerpc/cpumask: Convert /proc/cpuinfo to new cpumask API

Use new cpumask API in /proc/cpuinfo code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 58699a43eaa2..8dcec47a7b39 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -200,11 +200,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	unsigned short maj;
 	unsigned short min;
 
-	if (cpu_id == NR_CPUS) {
-		show_cpuinfo_summary(m);
-		return 0;
-	}
-
 	/* We only show online cpus: disable preempt (overzealous, I
 	 * knew) to prevent cpu going down. */
 	preempt_disable();
@@ -312,19 +307,28 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #endif
 
 	preempt_enable();
+
+	/* If this is the last cpu, print the summary */
+	if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
+		show_cpuinfo_summary(m);
+
 	return 0;
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	unsigned long i = *pos;
-
-	return i <= NR_CPUS ? (void *)(i + 1) : NULL;
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = cpumask_first(cpu_online_mask);
+	else
+		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	if ((*pos) < nr_cpu_ids)
+		return (void *)(unsigned long)(*pos + 1);
+	return NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
+	(*pos)++;
 	return c_start(m, pos);
 }
 
-- 
cgit v1.2.2


From cc1ba8ea6dde3f049b2b365d8fdc13976aee25cb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:41 +0000
Subject: powerpc/cpumask: Dynamically allocate cpu_sibling_map and
 cpu_core_map cpumasks

Dynamically allocate cpu_sibling_map and cpu_core_map cpumasks.

We don't need to set_cpu_online() the boot cpu in smp_prepare_boot_cpu,
init/main.c does it for us.

We also postpone setting of the boot cpu in cpu_sibling_map and cpu_core_map
until when the memory allocator is available (smp_prepare_cpus), similar
to x86.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 62e82c25c583..39babb1e2ce1 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,8 +59,8 @@
 
 struct thread_info *secondary_ti;
 
-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
-DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
@@ -271,6 +271,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	smp_store_cpu_info(boot_cpuid);
 	cpu_callin_map[boot_cpuid] = 1;
 
+	for_each_possible_cpu(cpu) {
+		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
+		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
+	}
+
+	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
 	if (smp_ops)
 		if (smp_ops->probe)
 			max_cpus = smp_ops->probe();
@@ -289,10 +299,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 void __devinit smp_prepare_boot_cpu(void)
 {
 	BUG_ON(smp_processor_id() != boot_cpuid);
-
-	set_cpu_online(boot_cpuid, true);
-	cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid));
-	cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid));
 #ifdef CONFIG_PPC64
 	paca[boot_cpuid].__current = current;
 #endif
@@ -525,15 +531,15 @@ int __devinit start_secondary(void *unused)
 	for (i = 0; i < threads_per_core; i++) {
 		if (cpu_is_offline(base + i))
 			continue;
-		cpu_set(cpu, per_cpu(cpu_sibling_map, base + i));
-		cpu_set(base + i, per_cpu(cpu_sibling_map, cpu));
+		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
+		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
 
 		/* cpu_core_map should be a superset of
 		 * cpu_sibling_map even if we don't have cache
 		 * information, so update the former here, too.
 		 */
-		cpu_set(cpu, per_cpu(cpu_core_map, base +i));
-		cpu_set(base + i, per_cpu(cpu_core_map, cpu));
+		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
+		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
 	}
 	l2_cache = cpu_to_l2cache(cpu);
 	for_each_online_cpu(i) {
@@ -541,8 +547,8 @@ int __devinit start_secondary(void *unused)
 		if (!np)
 			continue;
 		if (np == l2_cache) {
-			cpu_set(cpu, per_cpu(cpu_core_map, i));
-			cpu_set(i, per_cpu(cpu_core_map, cpu));
+			cpumask_set_cpu(cpu, cpu_core_mask(i));
+			cpumask_set_cpu(i, cpu_core_mask(cpu));
 		}
 		of_node_put(np);
 	}
@@ -602,10 +608,10 @@ int __cpu_disable(void)
 	/* Update sibling maps */
 	base = cpu_first_thread_in_core(cpu);
 	for (i = 0; i < threads_per_core; i++) {
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i));
-		cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu));
-		cpu_clear(cpu, per_cpu(cpu_core_map, base +i));
-		cpu_clear(base + i, per_cpu(cpu_core_map, cpu));
+		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
+		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
+		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
+		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
 	}
 
 	l2_cache = cpu_to_l2cache(cpu);
@@ -614,8 +620,8 @@ int __cpu_disable(void)
 		if (!np)
 			continue;
 		if (np == l2_cache) {
-			cpu_clear(cpu, per_cpu(cpu_core_map, i));
-			cpu_clear(i, per_cpu(cpu_core_map, cpu));
+			cpumask_clear_cpu(cpu, cpu_core_mask(i));
+			cpumask_clear_cpu(i, cpu_core_mask(cpu));
 		}
 		of_node_put(np);
 	}
-- 
cgit v1.2.2


From 828a69869ba266cabb486a6b59ea8643d56b33ce Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 26 Apr 2010 15:32:44 +0000
Subject: powerpc/cpumask: Update some comments

Since the *_map cpumask variants are deprecated, change the comments to
instead refer to *_mask.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 6 +++---
 arch/powerpc/kernel/smp.c          | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 8dcec47a7b39..5e4d852f640c 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -394,14 +394,14 @@ static void __init cpu_init_thread_core_maps(int tpc)
 
 /**
  * setup_cpu_maps - initialize the following cpu maps:
- *                  cpu_possible_map
- *                  cpu_present_map
+ *                  cpu_possible_mask
+ *                  cpu_present_mask
  *
  * Having the possible map set up early allows us to restrict allocations
  * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
  *
  * We do not initialize the online map here; cpus set their own bits in
- * cpu_online_map as they come up.
+ * cpu_online_mask as they come up.
  *
  * This function is valid only for Open Firmware systems.  finish_device_tree
  * must be called before using this.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 39babb1e2ce1..bf366167d369 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -468,7 +468,7 @@ out:
 	return id;
 }
 
-/* Must be called when no change can occur to cpu_present_map,
+/* Must be called when no change can occur to cpu_present_mask,
  * i.e. during cpu online or offline.
  */
 static struct device_node *cpu_to_l2cache(int cpu)
-- 
cgit v1.2.2


From 8e6d5573af55435160d329f6ae3fe16a0abbdaec Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Sat, 8 May 2010 20:28:41 +1000
Subject: perf, powerpc: Implement group scheduling transactional APIs

[paulus@samba.org: Set cpuhw->event[i]->hw.config in
power_pmu_commit_txn.]

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100508102841.GA10650@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 129 +++++++++++++++++++++------------------
 1 file changed, 68 insertions(+), 61 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 08460a2e9f41..43b83c35cf54 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -35,6 +35,9 @@ struct cpu_hw_events {
 	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+
+	unsigned int group_flag;
+	int n_txn_start;
 };
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
@@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static void event_sched_in(struct perf_event *event)
-{
-	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = smp_processor_id();
-	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
-	if (is_software_event(event))
-		event->pmu->enable(event);
-}
-
-/*
- * Called to enable a whole group of events.
- * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
- * Assumes the caller has disabled interrupts and has
- * frozen the PMU with hw_perf_save_disable.
- */
-int hw_perf_group_sched_in(struct perf_event *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx)
-{
-	struct cpu_hw_events *cpuhw;
-	long i, n, n0;
-	struct perf_event *sub;
-
-	if (!ppmu)
-		return 0;
-	cpuhw = &__get_cpu_var(cpu_hw_events);
-	n0 = cpuhw->n_events;
-	n = collect_events(group_leader, ppmu->n_counter - n0,
-			   &cpuhw->event[n0], &cpuhw->events[n0],
-			   &cpuhw->flags[n0]);
-	if (n < 0)
-		return -EAGAIN;
-	if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
-		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
-	if (i < 0)
-		return -EAGAIN;
-	cpuhw->n_events = n0 + n;
-	cpuhw->n_added += n;
-
-	/*
-	 * OK, this group can go on; update event states etc.,
-	 * and enable any software events
-	 */
-	for (i = n0; i < n0 + n; ++i)
-		cpuhw->event[i]->hw.config = cpuhw->events[i];
-	cpuctx->active_oncpu += n;
-	n = 1;
-	event_sched_in(group_leader);
-	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
-		if (sub->state != PERF_EVENT_STATE_OFF) {
-			event_sched_in(sub);
-			++n;
-		}
-	}
-	ctx->nr_active += n;
-
-	return 1;
-}
-
 /*
  * Add a event to the PMU.
  * If all events are not already frozen, then we disable and
@@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event)
 	cpuhw->event[n0] = event;
 	cpuhw->events[n0] = event->hw.config;
 	cpuhw->flags[n0] = event->hw.event_base;
+
+	/*
+	 * If group events scheduling transaction was started,
+	 * skip the schedulability test here, it will be peformed
+	 * at commit time(->commit_txn) as a whole
+	 */
+	if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+		goto nocheck;
+
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
 		goto out;
 	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
 		goto out;
-
 	event->hw.config = cpuhw->events[n0];
+
+nocheck:
 	++cpuhw->n_events;
 	++cpuhw->n_added;
 
@@ -896,11 +849,65 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	local_irq_restore(flags);
 }
 
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ */
+void power_pmu_start_txn(const struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuhw->n_txn_start = cpuhw->n_events;
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+void power_pmu_cancel_txn(const struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ */
+int power_pmu_commit_txn(const struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw;
+	long i, n;
+
+	if (!ppmu)
+		return -EAGAIN;
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n = cpuhw->n_events;
+	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+	if (i < 0)
+		return -EAGAIN;
+
+	for (i = cpuhw->n_txn_start; i < n; ++i)
+		cpuhw->event[i]->hw.config = cpuhw->events[i];
+
+	return 0;
+}
+
 struct pmu power_pmu = {
 	.enable		= power_pmu_enable,
 	.disable	= power_pmu_disable,
 	.read		= power_pmu_read,
 	.unthrottle	= power_pmu_unthrottle,
+	.start_txn	= power_pmu_start_txn,
+	.cancel_txn	= power_pmu_cancel_txn,
+	.commit_txn	= power_pmu_commit_txn,
 };
 
 /*
-- 
cgit v1.2.2


From 0fe1ac48bef018bed896307cd12f6ca9b5e704ab Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 13 Apr 2010 20:46:04 +0000
Subject: powerpc/perf_event: Fix oops due to perf_event_do_pending call

Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear.  Interrupts
should be hard-disabled at this point but they were enabled.  Because
the interrupt was not recoverable the system panicked.

The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.

The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1.  This means we can remove the tests in the exception
exit path and raw_local_irq_restore.

This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit.  (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/asm-offsets.c |  1 -
 arch/powerpc/kernel/entry_64.S    |  9 ------
 arch/powerpc/kernel/irq.c         |  6 ----
 arch/powerpc/kernel/time.c        | 60 +++++++++++++++++++++++++++++++--------
 4 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7059c5..c09138d150d4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
-	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 07109d843787..42e9d908914a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
-#ifdef CONFIG_PERF_EVENTS
-	/* check paca->perf_event_pending if we're enabling ints */
-	lbz	r3,PACAPERFPEND(r13)
-	and.	r3,r3,r5
-	beq	27f
-	bl	.perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f2031c22..066bd31551d5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
-#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
-	}
-
 	/*
 	 * if (get_paca()->hard_enabled) return;
 	 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1b16b9a3e49a..0441bbdadbd1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
 
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
 {
-	get_cpu_var(perf_event_pending) = 1;
-	set_dec(1);
-	put_cpu_var(perf_event_pending);
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
+	return x;
 }
 
+static inline void set_perf_event_pending_flag(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (1),
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (0),
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
 #define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
 #define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
-#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+	preempt_disable();
+	set_perf_event_pending_flag();
+	set_dec(1);
+	preempt_enable();
+}
+
+#else  /* CONFIG_PERF_EVENTS */
 
 #define test_perf_event_pending()	0
 #define clear_perf_event_pending()
 
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
 	set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
-	}
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 #endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)
 
 	calculate_steal_time();
 
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
+	}
+
 #ifdef CONFIG_PPC_ISERIES
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		get_lppaca()->int_dword.fields.decr_int = 0;
-- 
cgit v1.2.2


From 306d071f179c28c97688cb91c8585fd5ab840a9e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 1 Apr 2010 15:33:22 +0200
Subject: KVM: PPC: Don't export Book3S symbols on BookE

Book3S knows how to convert floats to doubles and vice versa. BookE doesn't.
So let's make sure we don't export them on BookE.

This fixes a link error on BookE with CONFIG_KVM=y.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/ppc_ksyms.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 58fdb3a784de..bc9f39d2598b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -101,8 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset);
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(kernel_thread);
 
+#ifndef CONFIG_BOOKE
 EXPORT_SYMBOL_GPL(cvt_df);
 EXPORT_SYMBOL_GPL(cvt_fd);
+#endif
 EXPORT_SYMBOL(giveup_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
-- 
cgit v1.2.2


From 2191d657c9eaa4c444c33e014199ed9de1ac339d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:32 +0200
Subject: KVM: PPC: Name generic 64-bit code generic

We have quite some code that can be used by Book3S_32 and Book3S_64 alike,
so let's call it "Book3S" instead of "Book3S_64", so we can later on
use it from the 32 bit port too.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/head_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index bed9a29ee383..844a44b64472 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -37,7 +37,7 @@
 #include <asm/firmware.h>
 #include <asm/page_64.h>
 #include <asm/irqflags.h>
-#include <asm/kvm_book3s_64_asm.h>
+#include <asm/kvm_book3s_asm.h>
 
 /* The physical memory is layed out such that the secondary processor
  * spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -169,7 +169,7 @@ exception_marker:
 /* KVM trampoline code needs to be close to the interrupt handlers */
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#include "../kvm/book3s_64_rmhandlers.S"
+#include "../kvm/book3s_rmhandlers.S"
 #endif
 
 _GLOBAL(generic_secondary_thread_init)
-- 
cgit v1.2.2


From 00c3a37ca332f54f2187720e51f7c0e18e91d7c9 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:42 +0200
Subject: KVM: PPC: Use CONFIG_PPC_BOOK3S define

Upstream recently added a new name for PPC64: Book3S_64.

So instead of using CONFIG_PPC64 we should use CONFIG_PPC_BOOK3S consotently.
That makes understanding the code easier (I hope).

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7059c5..57a8c49c8830 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -426,8 +426,8 @@ int main(void)
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 
-	/* book3s_64 */
-#ifdef CONFIG_PPC64
+	/* book3s */
+#ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
 	DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
@@ -442,7 +442,7 @@ int main(void)
 #else
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC_BOOK3S */
 #endif
 #ifdef CONFIG_44x
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
-- 
cgit v1.2.2


From 0604675fe17f68741730cebe74422605bb79d972 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:44 +0200
Subject: KVM: PPC: Use now shadowed vcpu fields

The shadow vcpu now contains some fields we don't use from the vcpu anymore.
Access to them happens using inline functions that happily use the shadow
vcpu fields.

So let's now ifdef them out to booke only and add asm-offsets.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 91 +++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 37 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 57a8c49c8830..e8003ff81eef 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -50,6 +50,9 @@
 #endif
 #ifdef CONFIG_KVM
 #include <linux/kvm_host.h>
+#ifndef CONFIG_BOOKE
+#include <asm/kvm_book3s.h>
+#endif
 #endif
 
 #ifdef CONFIG_PPC32
@@ -191,33 +194,9 @@ int main(void)
 	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-	DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
-	DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
-	DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
-	DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
-	DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
-	DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
-	DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
-	DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
-	DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
-	DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
-	DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
-	DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
-	DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
-	DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
-	DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
-	DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
-	DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
-	DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
-	DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
-	DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
-	DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
-	DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
-					    shadow_vcpu.vmhandler));
-	DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
-					   shadow_vcpu.scratch0));
-	DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
-					   shadow_vcpu.scratch1));
+	DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
+	DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb));
+	DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max));
 #endif
 #endif /* CONFIG_PPC64 */
 
@@ -412,9 +391,6 @@ int main(void)
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
-	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
-	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
-	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
 	DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
 	DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
@@ -422,26 +398,67 @@ int main(void)
 	DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
 	DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
 
-	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
-	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
-	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
-
 	/* book3s */
 #ifdef CONFIG_PPC_BOOK3S
-	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
-	DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
 	DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
-	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 	DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
 	DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
 	DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
 	DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
+	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
+			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
+	DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr));
+	DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer));
+	DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr));
+	DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr));
+	DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc));
+	DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0]));
+	DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1]));
+	DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2]));
+	DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3]));
+	DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4]));
+	DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5]));
+	DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6]));
+	DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7]));
+	DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8]));
+	DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9]));
+	DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10]));
+	DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11]));
+	DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12]));
+	DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13]));
+	DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1));
+	DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2));
+	DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					 vmhandler));
+	DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					scratch0));
+	DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					scratch1));
+	DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					in_guest));
+	DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					   fault_dsisr));
+	DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					 fault_dar));
+	DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					 last_inst));
+	DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu,
+					   shadow_srr1));
+#ifdef CONFIG_PPC_BOOK3S_32
+	DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr));
+#endif
 #else
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
+	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif
 #ifdef CONFIG_44x
-- 
cgit v1.2.2


From 97e492558f423d99c51eb934506b7a3d7c64613b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:51 +0200
Subject: KVM: PPC: Add SVCPU to Book3S_32

We need to keep the pointer to the shadow vcpu somewhere accessible from
within really early interrupt code. The best fit I found was the thread
struct, as that resides in an SPRG.

So let's put a pointer to the shadow vcpu in the thread struct and add
an asm-offset so we can find it.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e8003ff81eef..1804c2cd1dfd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -108,6 +108,9 @@ int main(void)
 	DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
+#endif
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
-- 
cgit v1.2.2


From be85669886776faa496b026b344691c325727731 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:54 +0200
Subject: KVM: PPC: Export MMU variables

Our shadow MMU code needs to know where the HTAB is located and how
big it is. So we need some variables from the kernel exported to
module space if KVM is built as a module.

CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/ppc_ksyms.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index bc9f39d2598b..2b7c43f95bb2 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -178,6 +178,11 @@ EXPORT_SYMBOL(switch_mmu_context);
 extern long mol_trampoline;
 EXPORT_SYMBOL(mol_trampoline); /* For MOL */
 EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
+
+extern struct hash_pte *Hash;
+extern unsigned long _SDR1;
+EXPORT_SYMBOL_GPL(Hash); /* For KVM */
+EXPORT_SYMBOL_GPL(_SDR1); /* For KVM */
 #ifdef CONFIG_SMP
 extern int mmu_hash_lock;
 EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
-- 
cgit v1.2.2


From 218d169c4c856eee7df56ea0fb8cbb32167e63d3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:55 +0200
Subject: PPC: Export SWITCH_FRAME_SIZE

We need the SWITCH_FRAME_SIZE define on Book3S_32 now too.
So let's export it unconditionally.

CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1804c2cd1dfd..2716c51906d6 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -210,8 +210,8 @@ int main(void)
 	/* Interrupt register frame */
 	DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
 	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-#ifdef CONFIG_PPC64
 	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+#ifdef CONFIG_PPC64
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-- 
cgit v1.2.2


From dd84c21748d9280f210565429b1bdb9b6353e8d2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 Apr 2010 00:11:57 +0200
Subject: KVM: PPC: Add KVM intercept handlers

When an interrupt occurs we don't know yet if we're in guest context or
in host context. When in guest context, KVM needs to handle it.

So let's pull the same trick we did on Book3S_64: Just add a macro to
determine if we're in guest context or not and if so jump on to KVM code.

CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/head_32.S | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e025e89fe93e..98c4b29a56f4 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -33,6 +33,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/bug.h>
+#include <asm/kvm_book3s_asm.h>
 
 /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
 #define LOAD_BAT(n, reg, RA, RB)	\
@@ -303,6 +304,7 @@ __secondary_hold_acknowledge:
  */
 #define EXCEPTION(n, label, hdlr, xfer)		\
 	. = n;					\
+	DO_KVM n;				\
 label:						\
 	EXCEPTION_PROLOG;			\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
@@ -358,6 +360,7 @@ i##n:								\
  *	-- paulus.
  */
 	. = 0x200
+	DO_KVM  0x200
 	mtspr	SPRN_SPRG_SCRATCH0,r10
 	mtspr	SPRN_SPRG_SCRATCH1,r11
 	mfcr	r10
@@ -381,6 +384,7 @@ i##n:								\
 
 /* Data access exception. */
 	. = 0x300
+	DO_KVM  0x300
 DataAccess:
 	EXCEPTION_PROLOG
 	mfspr	r10,SPRN_DSISR
@@ -397,6 +401,7 @@ DataAccess:
 
 /* Instruction access exception. */
 	. = 0x400
+	DO_KVM  0x400
 InstructionAccess:
 	EXCEPTION_PROLOG
 	andis.	r0,r9,0x4000		/* no pte found? */
@@ -413,6 +418,7 @@ InstructionAccess:
 
 /* Alignment exception */
 	. = 0x600
+	DO_KVM  0x600
 Alignment:
 	EXCEPTION_PROLOG
 	mfspr	r4,SPRN_DAR
@@ -427,6 +433,7 @@ Alignment:
 
 /* Floating-point unavailable */
 	. = 0x800
+	DO_KVM  0x800
 FPUnavailable:
 BEGIN_FTR_SECTION
 /*
@@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
 
 /* System call */
 	. = 0xc00
+	DO_KVM  0xc00
 SystemCall:
 	EXCEPTION_PROLOG
 	EXC_XFER_EE_LITE(0xc00, DoSyscall)
@@ -467,9 +475,11 @@ SystemCall:
  * by executing an altivec instruction.
  */
 	. = 0xf00
+	DO_KVM  0xf00
 	b	PerformanceMonitor
 
 	. = 0xf20
+	DO_KVM  0xf20
 	b	AltiVecUnavailable
 
 /*
@@ -882,6 +892,10 @@ __secondary_start:
 	RFI
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
 /*
  * Those generic dummy functions are kept for CPUs not
  * included in CONFIG_6xx
-- 
cgit v1.2.2


From 251585b5d02152973dbc24c803ca322bb977d4a2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 20 Apr 2010 02:49:53 +0200
Subject: KVM: PPC: Find HTAB ourselves

For KVM we need to find the location of the HTAB. We can either rely
on internal data structures of the kernel or ask the hardware.

Ben issued complaints about the internal data structure method, so
let's switch it to our own inquiry of the HTAB. Now we're fully
independend :-).

CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/ppc_ksyms.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 2b7c43f95bb2..bc9f39d2598b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -178,11 +178,6 @@ EXPORT_SYMBOL(switch_mmu_context);
 extern long mol_trampoline;
 EXPORT_SYMBOL(mol_trampoline); /* For MOL */
 EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
-
-extern struct hash_pte *Hash;
-extern unsigned long _SDR1;
-EXPORT_SYMBOL_GPL(Hash); /* For KVM */
-EXPORT_SYMBOL_GPL(_SDR1); /* For KVM */
 #ifdef CONFIG_SMP
 extern int mmu_hash_lock;
 EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
-- 
cgit v1.2.2


From 78e2e68a2b79f394b7cd61e07987a8a89af907f7 Mon Sep 17 00:00:00 2001
From: Li Yang <leoli@freescale.com>
Date: Fri, 7 May 2010 16:38:34 +0800
Subject: powerpc/fsl-booke: Fix InstructionTLBError execute permission check

In CONFIG_PTE_64BIT the PTE format has unique permission bits for user
and supervisor execute.  However on !CONFIG_PTE_64BIT we overload the
supervisor bit to imply user execute with _PAGE_USER set.  This allows
us to use the same permission check mask for user or supervisor code on
!CONFIG_PTE_64BIT.

However, on CONFIG_PTE_64BIT we map _PAGE_EXEC to _PAGE_BAP_UX so we
need a different permission mask based on the fault coming from a kernel
address or user space.

Without unique permission masks we see issues like the following with
modules:

Unable to handle kernel paging request for instruction fetch
Faulting instruction address: 0xf938d040
Oops: Kernel access of bad area, sig: 11 [#1]

Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Jin Qing <b24347@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/head_fsl_booke.S | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 725526547994..edd4a57fd29e 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -639,6 +639,13 @@ interrupt_base:
 	rlwinm	r12,r12,0,16,1
 	mtspr	SPRN_MAS1,r12
 
+	/* Make up the required permissions for kernel code */
+#ifdef CONFIG_PTE_64BIT
+	li	r13,_PAGE_PRESENT | _PAGE_BAP_SX
+	oris	r13,r13,_PAGE_ACCESSED@h
+#else
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
 	b	4f
 
 	/* Get the PGD for the current thread */
@@ -646,15 +653,15 @@ interrupt_base:
 	mfspr	r11,SPRN_SPRG_THREAD
 	lwz	r11,PGDIR(r11)
 
-4:
-	/* Make up the required permissions */
+	/* Make up the required permissions for user code */
 #ifdef CONFIG_PTE_64BIT
-	li	r13,_PAGE_PRESENT | _PAGE_EXEC
+	li	r13,_PAGE_PRESENT | _PAGE_BAP_UX
 	oris	r13,r13,_PAGE_ACCESSED@h
 #else
 	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
 #endif
 
+4:
 	FIND_PTE
 	andc.	r13,r13,r11		/* Check permission */
 
-- 
cgit v1.2.2


From 78f622377f7d31d988db350a43c5689dd5f31876 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 13 May 2010 14:38:21 -0500
Subject: powerpc/fsl-booke: Move loadcam_entry back to asm code to fix SMP
 ftrace

When we build with ftrace enabled its possible that loadcam_entry would
have used the stack pointer (even though the code doesn't need it).  We
call loadcam_entry in __secondary_start before the stack is setup.  To
ensure that loadcam_entry doesn't use the stack pointer the easiest
solution is to just have it in asm code.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/asm-offsets.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7059c5..0271b58ec31e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -448,6 +448,14 @@ int main(void)
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
+#ifdef CONFIG_FSL_BOOKE
+	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
+	DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
+	DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
+	DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
+	DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
+	DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
+#endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
-- 
cgit v1.2.2


From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:21 -0500
Subject: kgdb: core changes to support kdb

These are the minimum changes to the kgdb core in order to enable an
API to connect a new front end (kdb) to the debug core.

This patch introduces the dbg_kdb_mode variable controls where the
user level I/O is routed.  It will be routed to the gdbstub (kgdb) or
to the kdb front end which is a simple shell available over the kgdboc
connection.

You can switch back and forth between kdb or the gdb stub mode of
operation dynamically.  From gdb stub mode you can blindly type
"$3#33", or from the kdb mode you can enter "kgdb" to switch to the
gdb stub.

The logic in the debug core depends on kdb to look for the typical gdb
connection sequences and return immediately with KGDB_PASS_EVENT if a
gdb serial command sequence is detected.  That should allow a
reasonably seamless transition between kdb -> gdb without leaving the
kernel exception state.  The two gdb serial queries that kdb is
responsible for detecting are the "?" and "qSupported" packets.

CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Martin Hicks <mort@sgi.com>
---
 arch/powerpc/kernel/kgdb.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 41bada0298c8..c81e3de1306e 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -309,6 +309,11 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
 }
 
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->nip = pc;
+}
+
 /*
  * This function does PowerPC specific procesing for interfacing to gdb.
  */
-- 
cgit v1.2.2


From ba797b28131b1f1367b662936ea370239d603cff Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:25 -0500
Subject: powerpc,kgdb: Introduce low level trap catching

The only way the debugger can handle a trap in inside rcu_lock,
notify_die, or atomic_notifier_call_chain without a recursive fault is
to allow the kernel debugger to handle the exception first in
program_check_exception().

The other change here is to make sure that kgdb_handle_exception() is
called with correct parameters when catching an oops, because kdb
needs to know if the entry was an oops, single step, or breakpoint
exception.

[benh@kernel.crashing.org: move debugger_bpt instead of #ifdef]

CC: Paul Mackerras <paulus@samba.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/kgdb.c  | 6 ++++--
 arch/powerpc/kernel/traps.c | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index c81e3de1306e..82a7b228c81a 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -20,6 +20,7 @@
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/ptrace.h>
+#include <linux/kdebug.h>
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/machdep.h>
@@ -115,7 +116,8 @@ void kgdb_roundup_cpus(unsigned long flags)
 /* KGDB functions to use existing PowerPC64 hooks. */
 static int kgdb_debugger(struct pt_regs *regs)
 {
-	return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
+	return !kgdb_handle_exception(1, computeSignal(TRAP(regs)),
+				      DIE_OOPS, regs);
 }
 
 static int kgdb_handle_breakpoint(struct pt_regs *regs)
@@ -123,7 +125,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
 	if (user_mode(regs))
 		return 0;
 
-	if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0)
+	if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
 		return 0;
 
 	if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 29d128eb6c43..b6859aade9c2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -815,12 +815,15 @@ void __kprobes program_check_exception(struct pt_regs *regs)
 		return;
 	}
 	if (reason & REASON_TRAP) {
+		/* Debugger is first in line to stop recursive faults in
+		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
+		if (debugger_bpt(regs))
+			return;
+
 		/* trap exception */
 		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
 				== NOTIFY_STOP)
 			return;
-		if (debugger_bpt(regs))
-			return;
 
 		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
 		    report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
-- 
cgit v1.2.2


From 7358650e9e9a81c854dc4582b4193eb5ea500bf6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 19 May 2010 02:12:32 +0000
Subject: powerpc/rtasd: Don't start event scan if scan rate is zero

There appear to be Pegasos systems which have the rtas-event-scan
RTAS tokens, but on which the event scan always fails. They also
have an event-scan-rate property containing 0, which means call
event scan 0 times per minute.

So interpret a scan rate of 0 to mean don't scan at all. This fixes
the problem on the Pegasos machines and makes sense as well.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/rtasd.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index e907ca66f75a..638883e23e3a 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -490,6 +490,12 @@ static int __init rtas_init(void)
 		return -ENODEV;
 	}
 
+	if (!rtas_event_scan_rate) {
+		/* Broken firmware: take a rate of zero to mean don't scan */
+		printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
+		return 0;
+	}
+
 	/* Make room for the sequence number */
 	rtas_error_log_max = rtas_get_error_log_max();
 	rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
-- 
cgit v1.2.2


From abb17f9c3a92c5acf30e749efdf0419b7f50a5b8 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 19 May 2010 02:56:29 +0000
Subject: powerpc: Use common cpu_die (fixes SMP+SUSPEND build)

Configuring a powerpc 32 bit kernel for both SMP and SUSPEND turns on
CPU_HOTPLUG to enable disable_nonboot_cpus to be called by the common
suspend code.  Previously the definition of cpu_die for ppc32 was in
the powermac platform code, causing it to be undefined if that platform
as not selected.

arch/powerpc/kernel/built-in.o: In function 'cpu_idle':
arch/powerpc/kernel/idle.c:98: undefined reference to 'cpu_die'

Move the code from setup_64 to smp.c and rename the power mac
versions to their specific names.

Note that this does not setup the cpu_die pointers in either
smp_ops (request a given cpu die) or ppc_md (make this cpu die),
for other platforms but there are generic versions in smp.c.

Reported-by: Matt Sealey <matt@genesi-usa.com>
Reported-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup_64.c | 6 ------
 arch/powerpc/kernel/smp.c      | 6 ++++++
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 914389158a9b..cea66987a6ea 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -573,12 +573,6 @@ void ppc64_boot_msg(unsigned int src, const char *msg)
 	printk("[boot]%04x %s\n", src, msg);
 }
 
-void cpu_die(void)
-{
-	if (ppc_md.cpu_die)
-		ppc_md.cpu_die();
-}
-
 #ifdef CONFIG_SMP
 #define PCPU_DYN_SIZE		()
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bf366167d369..5c196d1086d9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -648,4 +648,10 @@ void cpu_hotplug_driver_unlock()
 {
 	mutex_unlock(&powerpc_cpu_hotplug_driver_mutex);
 }
+
+void cpu_die(void)
+{
+	if (ppc_md.cpu_die)
+		ppc_md.cpu_die();
+}
 #endif
-- 
cgit v1.2.2


From a1263c71448aa70afb6097fdedf93c3dff5a7a15 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Fri, 14 May 2010 12:04:41 +0000
Subject: powerpc/vio: Switch VIO Bus PM to use generic helpers

Switch to use the generic power management helpers.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b8e311d64ad5..9ce7b62dc3a4 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1381,29 +1381,6 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static int vio_pm_suspend(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm && pm->suspend)
-		return pm->suspend(dev);
-	return 0;
-}
-
-static int vio_pm_resume(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm && pm->resume)
-		return pm->resume(dev);
-	return 0;
-}
-
-const struct dev_pm_ops vio_dev_pm_ops = {
-	.suspend = vio_pm_suspend,
-	.resume = vio_pm_resume,
-};
-
 static struct bus_type vio_bus_type = {
 	.name = "vio",
 	.dev_attrs = vio_dev_attrs,
@@ -1411,7 +1388,7 @@ static struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
-	.pm = &vio_dev_pm_ops,
+	.pm = GENERIC_SUBSYS_PM_OPS,
 };
 
 /**
-- 
cgit v1.2.2


From 5b339bdf164d8aee394609768f7e2e4415b0252a Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@us.ibm.com>
Date: Mon, 10 May 2010 15:13:41 +0000
Subject: powerpc/pci: Check devices status property when scanning OF tree

We ran into an issue where it looks like we're not properly ignoring a
pci device with a non-good status property when we walk the device tree
and instanciate the Linux side PCI devices.

However, the EEH init code does look for the property and disables EEH
on these devices. This leaves us in an inconsistent where we are poking
at a supposedly bad piece of hardware and RTAS will block our config
cycles because EEH isn't enabled anyway.

Signed-of-by: Sonny Rao <sonnyrao@linux.vnet.ibm.com>

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci_of_scan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index cd11d5ca80df..6ddb795f83e8 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -310,6 +310,8 @@ static void __devinit __of_scan_bus(struct device_node *node,
 	/* Scan direct children */
 	for_each_child_of_node(node, child) {
 		pr_debug("  * %s\n", child->full_name);
+		if (!of_device_is_available(child))
+			continue;
 		reg = of_get_property(child, "reg", &reglen);
 		if (reg == NULL || reglen < 20)
 			continue;
-- 
cgit v1.2.2


From 426b6cb478e60352a463a0d1ec75c1c9fab30b13 Mon Sep 17 00:00:00 2001
From: Maxim Uvarov <muvarov@gmail.com>
Date: Tue, 11 May 2010 05:41:08 +0000
Subject: powerpc/crashdump: Do not fail on NULL pointer dereferencing

Signed-off-by: Maxim Uvarov <muvarov@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 6f4613dd05ef..341d8af6f33e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -375,6 +375,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	for_each_irq(i) {
 		struct irq_desc *desc = irq_to_desc(i);
 
+		if (!desc || !desc->chip || !desc->chip->eoi)
+			continue;
+
 		if (desc->status & IRQ_INPROGRESS)
 			desc->chip->eoi(i);
 
-- 
cgit v1.2.2


From 0644079410065567e3bb31fcb8e6441f2b7685a9 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 10 May 2010 16:25:51 +0000
Subject: powerpc/kdump: CPUs assume the context of the oopsing CPU

We wrap the crash_shutdown_handles[] calls with longjmp/setjmp, so if any
of them fault we can recover. The problem is we add a hook to the debugger
fault handler hook which calls longjmp unconditionally.

This first part of kdump is run before we marshall the other CPUs, so there
is a very good chance some CPU on the box is going to page fault. And when
it does it hits the longjmp code and assumes the context of the oopsing CPU.
The machine gets very confused when it has 10 CPUs all with the same stack,
all thinking they have the same CPU id. I get even more confused trying
to debug it.

The patch below adds crash_shutdown_cpu and uses it to specify which cpu is
in the protected region. Since it can only be -1 or the oopsing CPU, we don't
need to use memory barriers since it is only valid on the local CPU - no other
CPU will ever see a value that matches it's local CPU id.

Eventually we should switch the order and marshall all CPUs before doing the
crash_shutdown_handles[] calls, but that is a bigger fix.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 341d8af6f33e..7ced58dacb12 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -347,10 +347,12 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
 EXPORT_SYMBOL(crash_shutdown_unregister);
 
 static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
 
 static int handle_fault(struct pt_regs *regs)
 {
-	longjmp(crash_shutdown_buf, 1);
+	if (crash_shutdown_cpu == smp_processor_id())
+		longjmp(crash_shutdown_buf, 1);
 	return 0;
 }
 
@@ -391,6 +393,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 */
 	old_handler = __debugger_fault_handler;
 	__debugger_fault_handler = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
 	for (i = 0; crash_shutdown_handles[i]; i++) {
 		if (setjmp(crash_shutdown_buf) == 0) {
 			/*
@@ -404,6 +407,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 			asm volatile("sync; isync");
 		}
 	}
+	crash_shutdown_cpu = -1;
 	__debugger_fault_handler = old_handler;
 
 	/*
-- 
cgit v1.2.2


From 5d7a87217de48b234b3c8ff8a73059947d822e07 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 10 May 2010 16:27:38 +0000
Subject: powerpc/kdump: Use chip->shutdown to disable IRQs

I saw this in a kdump kernel:

IOMMU table initialized, virtual merging enabled
Interrupt 155954 (real) is invalid, disabling it.
Interrupt 155953 (real) is invalid, disabling it.

ie we took some spurious interrupts. default_machine_crash_shutdown tries
to disable all interrupt sources but uses chip->disable which maps to
the default action of:

static void default_disable(unsigned int irq)
{
}

If we use chip->shutdown, then we actually mask the IRQ:

static void default_shutdown(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);

        desc->chip->mask(irq);
        desc->status |= IRQ_MASKED;
}

Not sure why we don't implement a ->disable action for xics.c, or why
default_disable doesn't mask the interrupt.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 7ced58dacb12..cca7c8fafc1c 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -384,7 +384,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 			desc->chip->eoi(i);
 
 		if (!(desc->status & IRQ_DISABLED))
-			desc->chip->disable(i);
+			desc->chip->shutdown(i);
 	}
 
 	/*
-- 
cgit v1.2.2


From 095c7965f4dc870ed2b65143b1e2610de653416c Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 10 May 2010 18:59:18 +0000
Subject: powerpc: Use more accurate limit for first segment memory allocations

Author: Milton Miller <miltonm@bga.com>

On large machines we are running out of room below 256MB. In some cases we
only need to ensure the allocation is in the first segment, which may be
256MB or 1TB.

Add slb0_limit and use it to specify the upper limit for the irqstack and
emergency stacks.

On a large ppc64 box, this fixes a panic at boot when the crashkernel=
option is specified (previously we would run out of memory below 256MB).

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup_64.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index cea66987a6ea..f3fb5a79de52 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -424,9 +424,18 @@ void __init setup_system(void)
 	DBG(" <- setup_system()\n");
 }
 
+static u64 slb0_limit(void)
+{
+	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+		return 1UL << SID_SHIFT_1T;
+	}
+	return 1UL << SID_SHIFT;
+}
+
 #ifdef CONFIG_IRQSTACKS
 static void __init irqstack_early_init(void)
 {
+	u64 limit = slb0_limit();
 	unsigned int i;
 
 	/*
@@ -436,10 +445,10 @@ static void __init irqstack_early_init(void)
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
 			__va(lmb_alloc_base(THREAD_SIZE,
-					    THREAD_SIZE, 0x10000000));
+					    THREAD_SIZE, limit));
 		hardirq_ctx[i] = (struct thread_info *)
 			__va(lmb_alloc_base(THREAD_SIZE,
-					    THREAD_SIZE, 0x10000000));
+					    THREAD_SIZE, limit));
 	}
 }
 #else
@@ -470,7 +479,7 @@ static void __init exc_lvl_early_init(void)
  */
 static void __init emergency_stack_init(void)
 {
-	unsigned long limit;
+	u64 limit;
 	unsigned int i;
 
 	/*
@@ -482,7 +491,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(0x10000000ULL, lmb.rmo_size);
+	limit = min(slb0_limit(), lmb.rmo_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
-- 
cgit v1.2.2


From 1fc711f7ffb01089efc58042cfdbac8573d1b59a Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 13 May 2010 19:40:11 +0000
Subject: powerpc/kexec: Fix race in kexec shutdown

In kexec_prepare_cpus, the primary CPU IPIs the secondary CPUs to
kexec_smp_down().  kexec_smp_down() calls kexec_smp_wait() which sets
the hw_cpu_id() to -1.  The primary does this while leaving IRQs on
which means the primary can take a timer interrupt which can lead to
the IPIing one of the secondary CPUs (say, for a scheduler re-balance)
but since the secondary CPU now has a hw_cpu_id = -1, we IPI CPU
-1... Kaboom!

We are hitting this case regularly on POWER7 machines.

There is also a second race, where the primary will tear down the MMU
mappings before knowing the secondaries have entered real mode.

Also, the secondaries are clearing out any pending IPIs before
guaranteeing that no more will be received.

This changes kexec_prepare_cpus() so that we turn off IRQs in the
primary CPU much earlier.  It adds a paca flag to say that the
secondaries have entered the kexec_smp_down() IPI and turned off IRQs,
rather than overloading hw_cpu_id with -1.  This new paca flag is
again used to in indicate when the secondaries has entered real mode.

It also ensures that all CPUs have their IRQs off before we clear out
any pending IPI requests (in kexec_cpu_down()) to ensure there are no
trailing IPIs left unacknowledged.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/asm-offsets.c      |  1 +
 arch/powerpc/kernel/machine_kexec_64.c | 48 ++++++++++++++++++++++++----------
 arch/powerpc/kernel/misc_64.S          |  8 +++---
 arch/powerpc/kernel/paca.c             |  2 ++
 4 files changed, 42 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0271b58ec31e..1b784ff92d9d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -184,6 +184,7 @@ int main(void)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
 	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
 	DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 040bd1de8d99..26f9900f773c 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -155,33 +155,38 @@ void kexec_copy_flush(struct kimage *image)
 
 #ifdef CONFIG_SMP
 
-/* FIXME: we should schedule this function to be called on all cpus based
- * on calling the interrupts, but we would like to call it off irq level
- * so that the interrupt controller is clean.
- */
+static int kexec_all_irq_disabled = 0;
+
 static void kexec_smp_down(void *arg)
 {
+	local_irq_disable();
+	mb(); /* make sure our irqs are disabled before we say they are */
+	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+	while(kexec_all_irq_disabled == 0)
+		cpu_relax();
+	mb(); /* make sure all irqs are disabled before this */
+	/*
+	 * Now every CPU has IRQs off, we can clear out any pending
+	 * IPIs and be sure that no more will come in after this.
+	 */
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(0, 1);
 
-	local_irq_disable();
 	kexec_smp_wait();
 	/* NOTREACHED */
 }
 
-static void kexec_prepare_cpus(void)
+static void kexec_prepare_cpus_wait(int wait_state)
 {
 	int my_cpu, i, notified=-1;
 
-	smp_call_function(kexec_smp_down, NULL, /* wait */0);
 	my_cpu = get_cpu();
-
-	/* check the others cpus are now down (via paca hw cpu id == -1) */
+	/* Make sure each CPU has atleast made it to the state we need */
 	for (i=0; i < NR_CPUS; i++) {
 		if (i == my_cpu)
 			continue;
 
-		while (paca[i].hw_cpu_id != -1) {
+		while (paca[i].kexec_state < wait_state) {
 			barrier();
 			if (!cpu_possible(i)) {
 				printk("kexec: cpu %d hw_cpu_id %d is not"
@@ -201,20 +206,35 @@ static void kexec_prepare_cpus(void)
 			}
 			if (i != notified) {
 				printk( "kexec: waiting for cpu %d (physical"
-						" %d) to go down\n",
-						i, paca[i].hw_cpu_id);
+						" %d) to enter %i state\n",
+					i, paca[i].hw_cpu_id, wait_state);
 				notified = i;
 			}
 		}
 	}
+	mb();
+}
+
+static void kexec_prepare_cpus(void)
+{
+
+	smp_call_function(kexec_smp_down, NULL, /* wait */0);
+	local_irq_disable();
+	mb(); /* make sure IRQs are disabled before we say they are */
+	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+	/* we are sure every CPU has IRQs off at this point */
+	kexec_all_irq_disabled = 1;
 
 	/* after we tell the others to go down */
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(0, 0);
 
-	put_cpu();
+	/* Before removing MMU mapings make sure all CPUs have entered real mode */
+	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
 
-	local_irq_disable();
+	put_cpu();
 }
 
 #else /* ! SMP */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a5cf9c1356a6..a2b18dffa03e 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -24,6 +24,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cputable.h>
 #include <asm/thread_info.h>
+#include <asm/kexec.h>
 
 	.text
 
@@ -471,6 +472,10 @@ _GLOBAL(kexec_wait)
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
+	li	r4,KEXEC_STATE_REAL_MODE
+	stb	r4,PACAKEXECSTATE(r13)
+	SYNC
+
 99:	HMT_LOW
 #ifdef CONFIG_KEXEC		/* use no memory without kexec */
 	lwz	r4,0(r5)
@@ -494,14 +499,11 @@ kexec_flag:
  * note: this is a terminal routine, it does not save lr
  *
  * get phys id from paca
- * set paca id to -1 to say we got here
  * switch to real mode
  * join other cpus in kexec_wait(phys_id)
  */
 _GLOBAL(kexec_smp_wait)
 	lhz	r3,PACAHWCPUID(r13)
-	li	r4,-1
-	sth	r4,PACAHWCPUID(r13)	/* let others know we left */
 	bl	real_mode
 	b	.kexec_wait
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0c40c6f476fe..f88acf0218db 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -18,6 +18,7 @@
 #include <asm/pgtable.h>
 #include <asm/iseries/lpar_map.h>
 #include <asm/iseries/hv_types.h>
+#include <asm/kexec.h>
 
 /* This symbol is provided by the linker - let it fill in the paca
  * field correctly */
@@ -97,6 +98,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 	new_paca->kernelbase = (unsigned long) _stext;
 	new_paca->kernel_msr = MSR_KERNEL;
 	new_paca->hw_cpu_id = 0xffff;
+	new_paca->kexec_state = KEXEC_STATE_NONE;
 	new_paca->__current = &init_task;
 #ifdef CONFIG_PPC_STD_MMU_64
 	new_paca->slb_shadow_ptr = &slb_shadow[cpu];
-- 
cgit v1.2.2


From 60adec6226bbcf061d4c2d10944fced209d1847d Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 13 May 2010 19:40:11 +0000
Subject: powerpc/kdump: Fix race in kdump shutdown

When we are crashing, the crashing/primary CPU IPIs the secondaries to
turn off IRQs, go into real mode and wait in kexec_wait.  While this
is happening, the primary tears down all the MMU maps.  Unfortunately
the primary doesn't check to make sure the secondaries have entered
real mode before doing this.

On PHYP machines, the secondaries can take a long time shutting down
the IRQ controller as RTAS calls are need.  These RTAS calls need to
be serialised which resilts in the secondaries contending in
lock_rtas() and hence taking a long time to shut down.

We've hit this on large POWER7 machines, where some secondaries are
still waiting in lock_rtas(), when the primary tears down the HPTEs.

This patch makes sure all secondaries are in real mode before the
primary tears down the MMU.  It uses the new kexec_state entry in the
paca.  It times out if the secondaries don't reach real mode after
10sec.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index cca7c8fafc1c..8c066d6a8e4b 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -162,6 +162,32 @@ static void crash_kexec_prepare_cpus(int cpu)
 	/* Leave the IPI callback set */
 }
 
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+static void crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = 10000;
+	for (i=0; i < NR_CPUS && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i)) {
+				break;
+			}
+			if (!cpu_online(i)) {
+				break;
+			}
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+
 /*
  * This function will be called by secondary cpus or by kexec cpu
  * if soft-reset is activated to stop some CPUs.
@@ -419,6 +445,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crash_kexec_prepare_cpus(crashing_cpu);
 	cpu_set(crashing_cpu, cpus_in_crash);
 	crash_kexec_stop_spus();
+	crash_kexec_wait_realmode(crashing_cpu);
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(1, 0);
 }
-- 
cgit v1.2.2


From dd04c63c96425af9b6741f3abf0ad25d6b1c0e8d Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 16 May 2010 20:01:28 +0000
Subject: powerpc: Remove check of ibm,smt-snooze-delay OF property

I'm not sure why we have code for parsing an ibm,smt-snooze-delay OF
property. Since we have a smt-snooze-delay= boot option and we can
also set it at runtime via sysfs, it should be safe to get rid of
this code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/sysfs.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e235e52dc4fe..158fb731e199 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -67,33 +67,6 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev,
 static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 		   store_smt_snooze_delay);
 
-/* Only parse OF options if the matching cmdline option was not specified */
-static int smt_snooze_cmdline;
-
-static int __init smt_setup(void)
-{
-	struct device_node *options;
-	const unsigned int *val;
-	unsigned int cpu;
-
-	if (!cpu_has_feature(CPU_FTR_SMT))
-		return -ENODEV;
-
-	options = of_find_node_by_path("/options");
-	if (!options)
-		return -ENODEV;
-
-	val = of_get_property(options, "ibm,smt-snooze-delay", NULL);
-	if (!smt_snooze_cmdline && val) {
-		for_each_possible_cpu(cpu)
-			per_cpu(smt_snooze_delay, cpu) = *val;
-	}
-
-	of_node_put(options);
-	return 0;
-}
-__initcall(smt_setup);
-
 static int __init setup_smt_snooze_delay(char *str)
 {
 	unsigned int cpu;
@@ -102,8 +75,6 @@ static int __init setup_smt_snooze_delay(char *str)
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return 1;
 
-	smt_snooze_cmdline = 1;
-
 	if (get_option(&str, &snooze)) {
 		for_each_possible_cpu(cpu)
 			per_cpu(smt_snooze_delay, cpu) = snooze;
-- 
cgit v1.2.2


From b878dc00595440586874952dd85ce9b803360b87 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 16 May 2010 20:02:39 +0000
Subject: powerpc: Use smt_snooze_delay=-1 to always busy loop

Right now if we want to busy loop and not give up any time to the hypervisor
we put a very large value into smt_snooze_delay. This is sometimes useful
when running a single partition and you want to avoid any latencies due
to the hypervisor or CPU power state transitions. While this works, it's a bit
ugly - how big a number is enough now we have NO_HZ and can be idle for a very
long time.

The patch below makes smt_snooze_delay signed, and a negative value means loop
forever:

echo -1 > /sys/devices/system/cpu/cpu0/smt_snooze_delay

This change shouldn't affect the existing userspace tools (eg ppc64_cpu), but
I'm cc-ing Nathan just to be sure.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/sysfs.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 158fb731e199..c0d8c2006bf4 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
 #ifdef CONFIG_PPC64
 
 /* Time in microseconds we delay before sleeping in the idle loop */
-DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 };
+DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
 
 static ssize_t store_smt_snooze_delay(struct sys_device *dev,
 				      struct sysdev_attribute *attr,
@@ -44,9 +44,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
 {
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
 	ssize_t ret;
-	unsigned long snooze;
+	long snooze;
 
-	ret = sscanf(buf, "%lu", &snooze);
+	ret = sscanf(buf, "%ld", &snooze);
 	if (ret != 1)
 		return -EINVAL;
 
@@ -61,7 +61,7 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev,
 {
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
 
-	return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
 }
 
 static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -70,15 +70,14 @@ static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 static int __init setup_smt_snooze_delay(char *str)
 {
 	unsigned int cpu;
-	int snooze;
+	long snooze;
 
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return 1;
 
-	if (get_option(&str, &snooze)) {
-		for_each_possible_cpu(cpu)
-			per_cpu(smt_snooze_delay, cpu) = snooze;
-	}
+	snooze = simple_strtol(str, NULL, 10);
+	for_each_possible_cpu(cpu)
+		per_cpu(smt_snooze_delay, cpu) = snooze;
 
 	return 1;
 }
-- 
cgit v1.2.2


From 99ec28f183daa450faa7bdad6f932364ae325648 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sun, 9 May 2010 17:39:05 +0000
Subject: powerpc: Remove unused 'protect4gb' boot parameter

'protect4gb' boot parameter was introduced to avoid allocating dma
space acrossing 4GB boundary in 2007 (the commit
569975591c5530fdc9c7a3c45122e5e46f075a74).

In 2008, the IOMMU was fixed to use the boundary_mask parameter per
device properly. So 'protect4gb' workaround was removed (the
383af9525bb27f927511874f6306247ec13f1c28). But somehow I messed the
'protect4gb' boot parameter that was used to enable the
workaround.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/iommu.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ec94f906ea43..d5839179ec77 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -43,20 +43,9 @@
 #define DBG(...)
 
 static int novmerge;
-static int protect4gb = 1;
 
 static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
 
-static int __init setup_protect4gb(char *str)
-{
-	if (strcmp(str, "on") == 0)
-		protect4gb = 1;
-	else if (strcmp(str, "off") == 0)
-		protect4gb = 0;
-
-	return 1;
-}
-
 static int __init setup_iommu(char *str)
 {
 	if (!strcmp(str, "novmerge"))
@@ -66,7 +55,6 @@ static int __init setup_iommu(char *str)
 	return 1;
 }
 
-__setup("protect4gb=", setup_protect4gb);
 __setup("iommu=", setup_iommu);
 
 static unsigned long iommu_range_alloc(struct device *dev,
-- 
cgit v1.2.2