From 578b7cd1518f8d1b17a7fb1671d3d756c9cb49f1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Apr 2010 14:44:28 +1000 Subject: powerpc/vio: Add modalias support BenH: Added to vio_cmo_dev_attrs as well Provide a modalias entry for VIO devices in sysfs. I believe this was another initrd generation bugfix for anaconda. Signed-off-by: David Woodhouse Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 82237176a2a3..2f57956714bd 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -958,9 +958,12 @@ viodev_cmo_rd_attr(allocated); static ssize_t name_show(struct device *, struct device_attribute *, char *); static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf); static struct device_attribute vio_cmo_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, viodev_cmo_desired_show, viodev_cmo_desired_set), __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), @@ -1320,9 +1323,27 @@ static ssize_t devspec_show(struct device *dev, return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); } +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->archdata.of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); +} + static struct device_attribute vio_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR_NULL }; -- cgit v1.2.2 From 9c7cc234dc5edf5379fbbab4973f6704f59bc57b Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 29 Mar 2010 23:59:25 +0000 Subject: powerpc: Disable interrupts for data breakpoint exceptions Data address breakpoint exceptions are currently handled along with page-faults which require interrupts to remain in enabled state. Since exception handling for data breakpoints aren't pre-empt safe, we handle them separately. Signed-off-by: K.Prasad Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e3be98ffe2a7..3e423fbad6bc 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -735,8 +735,11 @@ _STATIC(do_hash_page) std r3,_DAR(r1) std r4,_DSISR(r1) - andis. r0,r4,0xa450 /* weird error? */ + andis. r0,r4,0xa410 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ + andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ @@ -823,6 +826,14 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) bl .raw_local_irq_restore b 11f +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr + b .ret_from_except_lite + /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: ENABLE_INTS -- cgit v1.2.2 From 6fe9d1facb5346a615f9b571df3b91593afb29c3 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Wed, 31 Mar 2010 21:39:24 +0000 Subject: powerpc/pseries: Export data from new hcall H_EM_GET_PARMS Add support for H_EM_GET_PARMS hcall that will return data related to power modes from the platform. Export the data directly to user space for administrative tools to interpret and use. cat /proc/powerpc/lparcfg will export power mode data Signed-off-by: Vaidyanathan Srinivasan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index c2c70e1b32cd..50362b6ef6e9 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -38,7 +38,7 @@ #include #include -#define MODULE_VERS "1.8" +#define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -487,6 +487,14 @@ static void splpar_dispatch_data(struct seq_file *m) seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); } +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -541,6 +549,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "slb_size=%d\n", mmu_slb_size); + parse_em_data(m); + return 0; } -- cgit v1.2.2 From fe1691e3f49d41452832f5aee2b952bd201ccab1 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:09 +0000 Subject: powerpc/8xx: Optimze TLB Miss handlers This removes a couple of insn's from the TLB Miss handlers whithout changing functionality. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3ef743fa5d7c..ecc4a02277e3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -343,17 +343,14 @@ InstructionTLBMiss: cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT bne- cr0, 2f - /* Clear PP lsb, 0x400 */ - rlwinm r10, r10, 0, 22, 20 - /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 22 and 28 must be clear. + * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ li r11, 0x00f0 - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -444,9 +441,7 @@ DataStoreTLBMiss: /* Honour kernel RO, User NA */ /* 0x200 == Extended encoding, bit 22 */ - /* r11 = (r10 & _PAGE_USER) >> 2 */ - rlwinm r11, r10, 32-2, 0x200 - or r10, r11, r10 + rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ /* r11 = (r10 & _PAGE_RW) >> 1 */ rlwinm r11, r10, 32-1, 0x200 or r10, r11, r10 -- cgit v1.2.2 From 4afb0be7ccda0ca551cc37572bab74ba4a3c18dd Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:10 +0000 Subject: powerpc/8xx: Avoid testing for kernel space in ITLB Miss. Only modules will cause ITLB Misses as we always pin the first 8MB of kernel memory. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ecc4a02277e3..84ca1d9b9ed3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -318,12 +318,16 @@ InstructionTLBMiss: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ +#ifdef CONFIG_MODULES + /* Only modules will cause ITLB Misses as we always + * pin the first 8MB of kernel memory */ andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ beq 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l rlwimi r10, r11, 0, 2, 19 3: +#endif lwz r11, 0(r10) /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ -- cgit v1.2.2 From d069cb4373fe0d451357c4d3769623a7564dfa9f Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:11 +0000 Subject: powerpc/8xx: Don't touch ACCESSED when no SWAP. Only the swap function cares about the ACCESSED bit in the pte. Do not waste cycles updateting ACCESSED when swap is not compiled into the kernel. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 84ca1d9b9ed3..6478a9632552 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -343,10 +343,11 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ +#ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT bne- cr0, 2f - +#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be @@ -439,10 +440,11 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ +#ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT - +#endif /* Honour kernel RO, User NA */ /* 0x200 == Extended encoding, bit 22 */ rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ -- cgit v1.2.2 From 469d62be9263b92f2c3329540cbb1c076111f4f3 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:12 +0000 Subject: powerpc/8xx: Use SPRG2 and DAR registers to stash r11 and cr. This avoids storing these registers in memory. CPU6 errata will still use the old way. Remove some G2 leftover accesses from 2.4 Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 49 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6478a9632552..1f1a04b5c2a4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -71,9 +71,6 @@ _ENTRY(_start); * in the first level table, but that would require many changes to the * Linux page directory/table functions that I don't want to do right now. * - * I used to use SPRG2 for a temporary register in the TLB handler, but it - * has since been put to other uses. I now use a hack to save a register - * and the CCR at memory location 0.....Someday I'll fix this..... * -- Dan */ .globl __start @@ -302,8 +299,13 @@ InstructionTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -359,13 +361,19 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi 2: mfspr r11, SPRN_SRR1 @@ -375,13 +383,20 @@ InstructionTLBMiss: rlwinm r11, r11, 0, 0xffff mtspr SPRN_SRR1, r11 - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + li r11, 0x00f0 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW b InstructionAccess . = 0x1200 @@ -392,8 +407,13 @@ DataStoreTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -461,18 +481,24 @@ DataStoreTLBMiss: * of the MMU. */ 2: li r11, 0x00f0 - mtspr SPRN_DAR,r11 /* Tag DAR */ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -684,9 +710,6 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 - li r3,0 - /* XXX What is that for ? SPRG2 appears otherwise unused on 8xx */ - mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ /* stack */ lis r1,init_thread_union@ha -- cgit v1.2.2 From f6d8c8bb1d360272d795927d39f3d2c5934e77d9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Mar 2010 05:33:34 +0000 Subject: powerpc/vio: Add missing unlock in error path Add an unlock before exiting the function. A simplified version of the semantic patch that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression E1; identifier f; @@ f (...) { <+... * spin_lock_irq (E1,...); ... when != E1 * return ...; ...+> } // Signed-off-by: Julia Lawall Acked-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2f57956714bd..2a3428bef83a 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -645,8 +645,10 @@ void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) found = 1; break; } - if (!found) + if (!found) { + spin_unlock_irqrestore(&vio_cmo.lock, flags); return; + } /* Increase/decrease in desired device entitlement */ if (desired >= viodev->cmo.desired) { -- cgit v1.2.2 From 21dbeb91a24d867af0e98ba155bfa80d2906344f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 26 Mar 2010 12:03:29 +0000 Subject: powerpc: Use set_cpus_allowed_ptr Use set_cpus_allowed_ptr rather than set_cpus_allowed. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1,E2; @@ - set_cpus_allowed(E1, cpumask_of_cpu(E2)) + set_cpus_allowed_ptr(E1, cpumask_of(E2)) @@ expression E; identifier I; @@ - set_cpus_allowed(E, I) + set_cpus_allowed_ptr(E, &I) // Signed-off-by: Julia Lawall Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c2ee14498077..e36f94f7411a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -561,12 +561,12 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ old_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); - set_cpus_allowed(current, old_mask); + set_cpus_allowed_ptr(current, &old_mask); snapshot_timebases(); -- cgit v1.2.2 From e9bbc8cde0e3c33b42ddbe1b02108cb5c97275eb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 18 Feb 2010 12:11:51 +0000 Subject: powerpc/pseries: Call ibm,os-term if the ibm,extended-os-term is present We have had issues in the past with ibm,os-term initiating shutdown of a partition. This is confusing to the user, especially if panic_timeout is non zero. The temporary fix was to avoid calling ibm,os-term if a panic_timeout was set and since we set it on every boot we basically never call ibm,os-term. An extended version of ibm,os-term has since been implemented which gives us the behaviour we want: "When the platform supports extended ibm,os-term behavior, the return to the RTAS will always occur unless there is a kernel assisted dump active as initiated by an ibm,configure-kernel-dump call." This patch checks for the ibm,extended-os-term property and calls ibm,os-term if it exists. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 74367841615a..0e1ec6f746f6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -691,10 +691,14 @@ void rtas_os_term(char *str) { int status; - if (panic_timeout) - return; - - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term")) + /* + * Firmware with the ibm,extended-os-term property is guaranteed + * to always return from an ibm,os-term call. Earlier versions without + * this property may terminate the partition which we want to avoid + * since it interferes with panic_timeout. + */ + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || + RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -705,8 +709,7 @@ void rtas_os_term(char *str) } while (rtas_busy_delay(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", - status); + printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; -- cgit v1.2.2 From a32aaf14513da776556ad9995de8d83cd76ae60a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Apr 2010 18:09:15 +1000 Subject: powerpc/vio: Add power management support Adds support for suspend/resume for VIO devices. This is needed for support for HMC initiated hibernation. Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2a3428bef83a..b8e311d64ad5 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1381,6 +1381,29 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int vio_pm_suspend(struct device *dev) +{ + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + + if (pm && pm->suspend) + return pm->suspend(dev); + return 0; +} + +static int vio_pm_resume(struct device *dev) +{ + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + + if (pm && pm->resume) + return pm->resume(dev); + return 0; +} + +const struct dev_pm_ops vio_dev_pm_ops = { + .suspend = vio_pm_suspend, + .resume = vio_pm_resume, +}; + static struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, @@ -1388,6 +1411,7 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .pm = &vio_dev_pm_ops, }; /** -- cgit v1.2.2 From 359e4284a3f37aba7fd06d993863de2509d86f54 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 7 Apr 2010 18:10:20 +1000 Subject: powerpc: Add kprobe-based event tracer This patch ports the kprobe-based event tracer to powerpc. This patch is based on x86 port. This brings powerpc on par with x86. Signed-off-by: Mahesh Salgaonkar Acked-by: Masami Hiramatsu Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 103 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ed2cfe17d25e..7a0c0199ea28 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -38,6 +38,109 @@ #include #include +/* + * The parameter save area on the stack is used to store arguments being passed + * to callee function and is located at fixed offset from stack pointer. + */ +#ifdef CONFIG_PPC32 +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ +#else /* CONFIG_PPC32 */ +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ +#endif + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. -- cgit v1.2.2 From 963cf3dc6342fe60bb78c615884537621abca0bc Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 19 Feb 2010 11:00:40 +0100 Subject: KVM: PPC: Add helpers to call FPU instructions To emulate paired single instructions, we need to be able to call FPU operations from within the kernel. Since we don't want gcc to spill arbitrary FPU code everywhere, we tell it to use a soft fpu. Since we know we can really call the FPU in safe areas, let's also add some calls that we can later use to actually execute real world FPU operations on the host's FPU. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/ppc_ksyms.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392ac63c..58fdb3a784de 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,6 +101,8 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL_GPL(cvt_df); +EXPORT_SYMBOL_GPL(cvt_fd); EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); -- cgit v1.2.2 From 4b83c330b4d38e869111bda6e9077d4f61ed974a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Apr 2010 15:33:44 +0000 Subject: powerpc/numa: Add form 1 NUMA affinity Firmware changed the way it represents memory and cpu affinity on POWER7. Unfortunately the old method now caps the topology to work around issues with legacy operating systems. For Linux to get the correct topology we need to use the new form 1 affinity information. We set the form 1 field in the client architecture, and if we see "1" in the ibm,associativity-form property firmware supports form 1 affinity and we should look at the first field in the ibm,associativity-reference-points array. If not we use the second field as we always have. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5f306c4946e5..97d4bd9442d3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -653,6 +653,7 @@ static void __init early_cmdline_parse(void) #else #define OV5_CMO 0x00 #endif +#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ @@ -706,7 +707,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, OV5_CMO, - 0, + OV5_TYPE1_AFFINITY, 0, 0, 0, -- cgit v1.2.2 From b8b14c66765ccba884c5c4570bf8be361d211d95 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 3 May 2010 07:36:22 -0500 Subject: powerpc/swiotlb: Fix off by one in determining boundary of which ops to use When we compare the devices DMA mask to the amount of memory we need to make sure we treat the DMA mask as an address boundary. For example if the DMA_MASK(32) and we have 4G of memory we'd incorrectly set the dma ops to swiotlb. We need to add one to the dma mask when we convert it. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/dma-swiotlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 59c928564a03..4ff4da2c238b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -1,7 +1,8 @@ /* * Contains routines needed to support swiotlb for ppc. * - * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. + * Author: Becky Bruce * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, sd->max_direct_dma_addr = 0; /* May need to bounce if the device can't address all of DRAM */ - if (dma_get_mask(dev) < lmb_end_of_DRAM()) + if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); return NOTIFY_DONE; -- cgit v1.2.2 From 471c70ff39809af783c7718defe574a9ba81dd26 Mon Sep 17 00:00:00 2001 From: Torez Smith Date: Fri, 5 Mar 2010 10:43:01 +0000 Subject: powerpc/booke: Add Stack Marking support to Booke Exception Prolog This patch adds a marker to the exception stack frame to aid in debugging. It's already inserted on other platforms and xmon recognizes it and identifies exception frames when showing stack traces. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_booke.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 50504ae39cb7..a0bf158c8b47 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -1,6 +1,7 @@ #ifndef __HEAD_BOOKE_H__ #define __HEAD_BOOKE_H__ +#include /* for STACK_FRAME_REGS_MARKER */ /* * Macros used for common Book-e exception handling */ @@ -48,6 +49,9 @@ stw r10,0(r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -- cgit v1.2.2 From 795033c344d88dc6aa5106d0cc358656f29bd722 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:07 +0000 Subject: powerpc/44x: break out cpu init code into stand-alone function The 47x platform supports multiple cores and shares code with 44x. Break out code that is common for initializing the primary and secondary cpus into a function which can be called for both. Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_44x.S | 330 +++++++++++++++++++++-------------------- 1 file changed, 171 insertions(+), 159 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 711368b993f2..39be049a7850 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -69,165 +69,7 @@ _ENTRY(_start); mr r27,r7 li r24,0 /* CPU number */ -/* - * In case the firmware didn't do it, we apply some workarounds - * that are good for all 440 core variants here - */ - mfspr r3,SPRN_CCR0 - rlwinm r3,r3,0,0,27 /* disable icache prefetch */ - isync - mtspr SPRN_CCR0,r3 - isync - sync - -/* - * Set up the initial MMU state - * - * We are still executing code at the virtual address - * mappings set by the firmware for the base of RAM. - * - * We first invalidate all TLB entries but the one - * we are running from. We then load the KERNELBASE - * mappings so we can begin to use kernel addresses - * natively and so the interrupt vector locations are - * permanently pinned (necessary since Book E - * implementations always have translation enabled). - * - * TODO: Use the known TLB entry we are running from to - * determine which physical region we are located - * in. This can be used to determine where in RAM - * (on a shared CPU system) or PCI memory space - * (on a DRAMless system) we are located. - * For now, we assume a perfect world which means - * we are located at the base of DRAM (physical 0). - */ - -/* - * Search TLB for entry that we are currently using. - * Invalidate all entries but the one we are using. - */ - /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ - mfspr r3,SPRN_PID /* Get PID */ - mfmsr r4 /* Get MSR */ - andi. r4,r4,MSR_IS@l /* TS=1? */ - beq wmmucr /* If not, leave STS=0 */ - oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ -wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ - sync - - bl invstr /* Find our address */ -invstr: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skpinv /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skpinv: addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync /* If so, context change */ - -/* - * Configure and load pinned entry into TLB slot 63. - */ - - lis r3,PAGE_OFFSET@h - ori r3,r3,PAGE_OFFSET@l - - /* Kernel is at the base of RAM */ - li r4, 0 /* Load the kernel physical address */ - - /* Load the kernel PID = 0 */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Initialize MMUCR */ - li r5,0 - mtspr SPRN_MMUCR,r5 - sync - - /* pageid fields */ - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M - - /* xlat fields */ - clrrwi r4,r4,10 /* Mask off the real page number */ - /* ERPN is 0 for first 4GB page */ - - /* attrib fields */ - /* Added guarded bit to protect against speculative loads/stores */ - li r5,0 - ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - li r0,63 /* TLB slot 63 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ - tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ - tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ - - /* Force context change */ - mfmsr r0 - mtspr SPRN_SRR1, r0 - lis r0,3f@h - ori r0,r0,3f@l - mtspr SPRN_SRR0,r0 - sync - rfi - - /* If necessary, invalidate original entry we used */ -3: cmpwi r23,63 - beq 4f - li r6,0 - tlbwe r6,r23,PPC44x_TLB_PAGEID - isync - -4: -#ifdef CONFIG_PPC_EARLY_DEBUG_44x - /* Add UART mapping for early debug. */ - - /* pageid fields */ - lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h - ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K - - /* xlat fields */ - lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h - ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH - - /* attrib fields */ - li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) - li r0,62 /* TLB slot 0 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID - tlbwe r4,r0,PPC44x_TLB_XLAT - tlbwe r5,r0,PPC44x_TLB_ATTRIB - - /* Force context change */ - isync -#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ - - /* Establish the interrupt vector offsets */ - SET_IVOR(0, CriticalInput); - SET_IVOR(1, MachineCheck); - SET_IVOR(2, DataStorage); - SET_IVOR(3, InstructionStorage); - SET_IVOR(4, ExternalInput); - SET_IVOR(5, Alignment); - SET_IVOR(6, Program); - SET_IVOR(7, FloatingPointUnavailable); - SET_IVOR(8, SystemCall); - SET_IVOR(9, AuxillaryProcessorUnavailable); - SET_IVOR(10, Decrementer); - SET_IVOR(11, FixedIntervalTimer); - SET_IVOR(12, WatchdogTimer); - SET_IVOR(13, DataTLBError); - SET_IVOR(14, InstructionTLBError); - SET_IVOR(15, DebugCrit); - - /* Establish the interrupt vector base */ - lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ - mtspr SPRN_IVPR,r4 + bl init_cpu_state /* * This is where the main kernel code starts. @@ -646,6 +488,176 @@ _GLOBAL(set_context) isync /* Force context change */ blr +/* + * Init CPU state. This is called at boot time or for secondary CPUs + * to setup initial TLB entries, setup IVORs, etc... + */ +_GLOBAL(init_cpu_state) + mflr r22 +/* + * In case the firmware didn't do it, we apply some workarounds + * that are good for all 440 core variants here + */ + mfspr r3,SPRN_CCR0 + rlwinm r3,r3,0,0,27 /* disable icache prefetch */ + isync + mtspr SPRN_CCR0,r3 + isync + sync + +/* + * Set up the initial MMU state + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* pageid fields */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K + + /* xlat fields */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* attrib fields */ + li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) + li r0,62 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID + tlbwe r4,r0,PPC44x_TLB_XLAT + tlbwe r5,r0,PPC44x_TLB_ATTRIB + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, DebugCrit); + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + addis r22,r22,KERNELBASE@h + mtlr r22 + blr + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. -- cgit v1.2.2 From e7f75ad01d590243904c2d95ab47e6b2e9ef6dad Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:12 +0000 Subject: powerpc/47x: Base ppc476 support This patch adds the base support for the 476 processor. The code was primarily written by Ben Herrenschmidt and Torez Smith, but I've been maintaining it for a while. The goal is to have a single binary that will run on 44x and 47x, but we still have some details to work out. The biggest is that the L1 cache line size differs on the two platforms, but it's currently a compile-time option. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 13 ++ arch/powerpc/kernel/entry_32.S | 5 + arch/powerpc/kernel/head_44x.S | 502 +++++++++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/misc_32.S | 9 +- arch/powerpc/kernel/smp.c | 8 + 5 files changed, 519 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8af4949434b2..a1d845839727 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1701,6 +1701,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 476 core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1175a8539e6c..ed4aeb96398b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -373,11 +373,13 @@ syscall_exit_cont: bnel- load_dbcr0 #endif #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f 1: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) #endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 @@ -848,6 +850,9 @@ resume_kernel: /* interrupts are hard-disabled at this point */ restore: #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION + b 1f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 39be049a7850..1acd175428c4 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -37,6 +37,7 @@ #include #include #include +#include #include "head_booke.h" @@ -191,7 +192,7 @@ interrupt_base: #endif /* Data TLB Error Interrupt */ - START_EXCEPTION(DataTLBError) + START_EXCEPTION(DataTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -282,7 +283,7 @@ tlb_44x_patch_hwater_D: mfspr r10,SPRN_DEAR /* Jump to common tlb load */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -302,7 +303,7 @@ tlb_44x_patch_hwater_D: * information from different registers and bailout * to a different point. */ - START_EXCEPTION(InstructionTLBError) + START_EXCEPTION(InstructionTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -378,7 +379,7 @@ tlb_44x_patch_hwater_I: mfspr r10,SPRN_SRR0 /* Jump to common TLB load point */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -392,15 +393,7 @@ tlb_44x_patch_hwater_I: mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage - /* Debug Interrupt */ - DEBUG_CRIT_EXCEPTION - -/* - * Local functions - */ - /* - * Both the instruction and data TLB miss get to this * point to load the TLB. * r10 - EA of fault @@ -410,7 +403,7 @@ tlb_44x_patch_hwater_I: * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ -finish_tlb_load: +finish_tlb_load_44x: /* Combine RPN & ERPN an write WS 0 */ rlwimi r11,r12,0,0,31-PAGE_SHIFT tlbwe r11,r13,PPC44x_TLB_XLAT @@ -443,6 +436,227 @@ finish_tlb_load: mfspr r10, SPRN_SPRG_RSCRATCH0 rfi /* Force context change */ +/* TLB error interrupts for 476 + */ +#ifdef CONFIG_PPC_47x + START_EXCEPTION(DataTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + lwz r11,0(r12) /* Get high word of pte entry */ + + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common tlb load */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11,SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13,SPRN_SPRG_RSCRATCH3 + mfspr r12,SPRN_SPRG_RSCRATCH2 + mfspr r11,SPRN_SPRG_RSCRATCH1 + mfspr r10,SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + + /* Load PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + + lwz r11,0(r12) /* Get high word of pte entry */ + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common TLB load point */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - free to use + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - free to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load_47x: + /* Combine RPN & ERPN an write WS 1 */ + rlwimi r11,r12,0,0,31-PAGE_SHIFT + tlbwe r11,r13,1 + + /* And make up word 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,2 + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi + +#endif /* CONFIG_PPC_47x */ + + /* Debug Interrupt */ + /* + * This statement needs to exist at the end of the IVPR + * definition just in case you end up taking a debug + * exception within another exception. + */ + DEBUG_CRIT_EXCEPTION + /* * Global functions */ @@ -491,9 +705,18 @@ _GLOBAL(set_context) /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... + * */ _GLOBAL(init_cpu_state) mflr r22 +#ifdef CONFIG_PPC_47x + /* We use the PVR to differenciate 44x cores from 476 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476@h + beq head_start_47x +#endif /* CONFIG_PPC_47x */ + /* * In case the firmware didn't do it, we apply some workarounds * that are good for all 440 core variants here @@ -506,7 +729,7 @@ _GLOBAL(init_cpu_state) sync /* - * Set up the initial MMU state + * Set up the initial MMU state for 44x * * We are still executing code at the virtual address * mappings set by the firmware for the base of RAM. @@ -646,16 +869,257 @@ skpinv: addi r4,r4,1 /* Increment */ SET_IVOR(10, Decrementer); SET_IVOR(11, FixedIntervalTimer); SET_IVOR(12, WatchdogTimer); - SET_IVOR(13, DataTLBError); - SET_IVOR(14, InstructionTLBError); + SET_IVOR(13, DataTLBError44x); + SET_IVOR(14, InstructionTLBError44x); SET_IVOR(15, DebugCrit); + b head_start_common + + +#ifdef CONFIG_PPC_47x + +#ifdef CONFIG_SMP + +/* Entry point for secondary 47x processors */ +_GLOBAL(start_secondary_47x) + mr r24,r3 /* CPU number */ + + bl init_cpu_state + + /* Now we need to bolt the rest of kernel memory which + * is done in C code. We must be careful because our task + * struct or our stack can (and will probably) be out + * of reach of the initial 256M TLB entry, so we use a + * small temporary stack in .bss for that. This works + * because only one CPU at a time can be in this code + */ + lis r1,temp_boot_stack@h + ori r1,r1,temp_boot_stack@l + addi r1,r1,1024-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + bl mmu_init_secondary + + /* Now we can get our task struct and real stack pointer */ + + /* Get current_thread_info and current */ + lis r1,secondary_ti@ha + lwz r1,secondary_ti@l(r1) + lwz r2,TI_TASK(r1) + + /* Current stack pointer */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + + /* Kernel stack for exception entry in SPRG3 */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + b start_secondary + +#endif /* CONFIG_SMP */ + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + */ + +head_start_47x: + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 1f +1: mflr r23 + tlbsx r23,0,r23 + tlbre r24,r23,0 + tlbre r25,r23,1 + tlbre r26,r23,2 + +/* + * Cleanup time + */ + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + +clear_all_utlb_entries: + + #; Set initial values. + + addis r3,0,0x8000 + addi r4,0,0 + addi r5,0,0 + b clear_utlb_entry + + #; Align the loop to speed things up. + + .align 6 + +clear_utlb_entry: + + tlbwe r4,r3,0 + tlbwe r5,r3,1 + tlbwe r5,r3,2 + addis r3,r3,0x2000 + cmpwi r3,0 + bne clear_utlb_entry + addis r3,0,0x8000 + addis r4,r4,0x100 + cmpwi r4,0 + bne clear_utlb_entry + + #; Restore original entry. + + oris r23,r23,0x8000 /* specify the way */ + tlbwe r24,r23,0 + tlbwe r25,r23,1 + tlbwe r26,r23,2 + +/* + * Configure and load pinned entry into TLB for the kernel core + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Word 0 */ + clrrwi r3,r3,12 /* Mask off the effective page number */ + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M + + /* Word 1 */ + clrrwi r4,r4,12 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + /* Word 2 */ + li r5,0 + ori r5,r5,PPC47x_TLB2_S_RWX +#ifdef CONFIG_SMP + ori r5,r5,PPC47x_TLB2_M +#endif + + /* We write to way 0 and bolted 0 */ + lis r0,0x8800 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + +/* + * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix + * them up later + */ + LOAD_REG_IMMEDIATE(r3, 0x9abcdef0) + mtspr SPRN_SSPCR,r3 + mtspr SPRN_USPCR,r3 + LOAD_REG_IMMEDIATE(r3, 0x12345670) + mtspr SPRN_ISPCR,r3 + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* Invalidate original entry we used */ +3: + rlwinm r24,r24,0,21,19 /* clear the "valid" bit */ + tlbwe r24,r23,0 + addi r24,0,0 + tlbwe r24,r23,1 + tlbwe r24,r23,2 + isync /* Clear out the shadow TLB entries */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* Word 0 */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M + + /* Word 1 */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* Word 2 */ + li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG) + + /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same + * congruence class as the kernel, we need to make sure of it at + * some point + */ + lis r0,0x8d00 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheckA); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError47x); + SET_IVOR(14, InstructionTLBError47x); + SET_IVOR(15, DebugCrit); + + /* We configure icbi to invalidate 128 bytes at a time since the + * current 32-bit kernel code isn't too happy with icache != dcache + * block size + */ + mfspr r3,SPRN_CCR0 + oris r3,r3,0x0020 + mtspr SPRN_CCR0,r3 + isync + +#endif /* CONFIG_PPC_47x */ + +/* + * Here we are back to code that is common between 44x and 47x + * + * We proceed to further kernel initialization and return to the + * main kernel entry + */ +head_start_common: /* Establish the interrupt vector base */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ mtspr SPRN_IVPR,r4 addis r22,r22,KERNELBASE@h mtlr r22 + isync blr /* @@ -683,3 +1147,9 @@ swapper_pg_dir: */ abatron_pteptrs: .space 8 + +#ifdef CONFIG_SMP + .align 12 +temp_boot_stack: + .space 1024 +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 8649f536f8df..8043d1b73cf0 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -441,7 +441,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) addi r3,r3,L1_CACHE_BYTES bdnz 0b sync -#ifndef CONFIG_44x +#ifdef CONFIG_44x /* We don't flush the icache on 44x. Those have a virtual icache * and we don't have access to the virtual address here (it's * not the page vaddr but where it's mapped in user space). The @@ -449,15 +449,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * a change in the address space occurs, before returning to * user space */ +BEGIN_MMU_FTR_SECTION + blr +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) +#endif /* CONFIG_44x */ mtctr r4 1: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 1b sync isync -#endif /* CONFIG_44x */ blr +#ifndef CONFIG_BOOKE /* * Flush a particular page from the data cache to RAM, identified * by its physical address. We turn off the MMU so we can just use @@ -490,6 +494,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mtmsr r10 /* restore DR */ isync blr +#endif /* CONFIG_BOOKE */ /* * Clear pages using the dcbz instruction, which doesn't cause any diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e36f94f7411a..3fe4de2b685e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -495,6 +495,14 @@ int __devinit start_secondary(void *unused) current->active_mm = &init_mm; smp_store_cpu_info(cpu); + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif set_dec(tb_ticks_per_jiffy); preempt_disable(); cpu_callin_map[cpu] = 1; -- cgit v1.2.2 From fc5e709731429bc2db27897630e7c0089f297680 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 03:43:18 +0000 Subject: powerpc/476: add machine check handler for 47x core The 47x core's MCSR varies from 44x, so it needs it's own machine check handler. Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 1 + arch/powerpc/kernel/traps.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a1d845839727..ad6baf834a76 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1712,6 +1712,7 @@ static struct cpu_spec __initdata cpu_specs[] = { MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, .icache_bsize = 32, .dcache_bsize = 128, + .machine_check = machine_check_47x, .platform = "ppc470", }, { /* default match */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 29d128eb6c43..ca7ce85ebc2e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -380,6 +380,46 @@ int machine_check_440A(struct pt_regs *regs) } return 0; } + +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR + "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} #elif defined(CONFIG_E500) int machine_check_e500(struct pt_regs *regs) { -- cgit v1.2.2 From 221c185d4e11b4061409da5d592779ced484614c Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:24 +0000 Subject: powerpc/476: Add isync after loading mmu and debug spr's 476 requires an isync after loading MMU and debug related SPR's. Some of these are in performance-critical paths and may need to be optimized, but initially, we're playing it safe. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/kprobes.c | 3 +++ arch/powerpc/kernel/process.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index b36f074524ad..c533525ca56a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -114,6 +114,9 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) #ifdef CONFIG_PPC_ADV_DEBUG_REGS regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif #endif /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e4d71ced97ef..9d255b4f0a0e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -371,6 +371,9 @@ int set_dabr(unsigned long dabr) /* XXX should we have a CPU_FTR_HAS_DABR ? */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DAC1, dabr); +#ifdef CONFIG_PPC_47x + isync(); +#endif #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); #endif -- cgit v1.2.2 From b4e8c8dd8456c1d3685fb5b715c9795d250f500e Mon Sep 17 00:00:00 2001 From: Torez Smith Date: Fri, 5 Mar 2010 10:45:54 +0000 Subject: powerpc/4xx: Simple platform for the ISS 4xx simulator This is a trivial 4xx plaform that uses the new simple bsp from Josh and is handy to use in simulators such as ISS or even Mambo who don't properly implement most of the actual devices in the SoC but really only the core. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 15 +++++++++++++++ arch/powerpc/kernel/head_44x.S | 2 ++ 2 files changed, 17 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index ad6baf834a76..9556be903e96 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1715,6 +1715,21 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 1acd175428c4..5ab484ef06a7 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -715,6 +715,8 @@ _GLOBAL(init_cpu_state) srwi r3,r3,16 cmplwi cr0,r3,PVR_476@h beq head_start_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq head_start_47x #endif /* CONFIG_PPC_47x */ /* -- cgit v1.2.2 From d5f86fe3457f48f27eecd40c605e7876d026af7c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:33 +0000 Subject: powerpc/cpumask: Convert rtasd to new cpumask API Use cpumask_first, cpumask_next in rtasd code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 4190eae7850a..e907ca66f75a 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -411,9 +411,9 @@ static void rtas_event_scan(struct work_struct *w) get_online_cpus(); - cpu = next_cpu(smp_processor_id(), cpu_online_map); - if (cpu == NR_CPUS) { - cpu = first_cpu(cpu_online_map); + cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(cpu_online_mask); if (first_pass) { first_pass = 0; @@ -466,8 +466,8 @@ static void start_event_scan(void) /* Retreive errors from nvram if any */ retreive_nvram_error_log(); - schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, - event_scan_delay); + schedule_delayed_work_on(cpumask_first(cpu_online_mask), + &event_scan_work, event_scan_delay); } static int __init rtas_init(void) -- cgit v1.2.2 From bfb9126defa80cbed6d91ed9685b238b0d7e81c4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:34 +0000 Subject: powerpc/cpumask: Convert smp_cpus_done to new cpumask API Use the new cpumask_* functions and dynamically allocate the cpumask in smp_cpus_done. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3fe4de2b685e..17523a0abf66 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -562,19 +562,22 @@ int setup_profiling_timer(unsigned int multiplier) void __init smp_cpus_done(unsigned int max_cpus) { - cpumask_t old_mask; + cpumask_var_t old_mask; /* We want the setup_cpu() here to be called from CPU 0, but our * init thread may have been "borrowed" by another CPU in the meantime * se we pin us down to CPU 0 for a short while */ - old_mask = current->cpus_allowed; + alloc_cpumask_var(&old_mask, GFP_NOWAIT); + cpumask_copy(old_mask, ¤t->cpus_allowed); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); - set_cpus_allowed_ptr(current, &old_mask); + set_cpus_allowed_ptr(current, old_mask); + + free_cpumask_var(old_mask); snapshot_timebases(); -- cgit v1.2.2 From b6decb707952c678d110699abb5ed86d45ca6927 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:35 +0000 Subject: powerpc/cpumask: Convert fixup_irqs to new cpumask API Use new cpumask_* functions, and dynamically allocate cpumask in fixup_irqs. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 17 ++++++++++------- arch/powerpc/kernel/smp.c | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f2031c22..250ee2ebf288 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -290,30 +290,33 @@ u64 arch_irq_stat_cpu(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +void fixup_irqs(const struct cpumask *map) { struct irq_desc *desc; unsigned int irq; static int warned; + cpumask_var_t mask; - for_each_irq(irq) { - cpumask_t mask; + alloc_cpumask_var(&mask, GFP_KERNEL); + for_each_irq(irq) { desc = irq_to_desc(irq); if (desc && desc->status & IRQ_PER_CPU) continue; - cpumask_and(&mask, desc->affinity, &map); - if (any_online_cpu(mask) == NR_CPUS) { + cpumask_and(mask, desc->affinity, map); + if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + cpumask_copy(mask, map); } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, &mask); + desc->chip->set_affinity(irq, mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } + free_cpumask_var(mask); + local_irq_enable(); mdelay(1); local_irq_disable(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 17523a0abf66..62e82c25c583 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -313,7 +313,7 @@ int generic_cpu_disable(void) set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); #endif return 0; } @@ -333,7 +333,7 @@ int generic_cpu_enable(unsigned int cpu) cpu_relax(); #ifdef CONFIG_PPC64 - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); /* counter the irq disable in fixup_irqs */ local_irq_enable(); #endif -- cgit v1.2.2 From 2c2df038450cbf628426183c9efffc17cfea3406 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:39 +0000 Subject: powerpc/cpumask: Refactor /proc/cpuinfo code This separates the per cpu output from the summary output at the end of the file, making it easier to convert to the new cpumask API in a subsequent patch. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 62 ++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 48f0a008b20b..58699a43eaa2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -161,6 +161,38 @@ extern u32 cpu_temp_both(unsigned long cpu); DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif +static void show_cpuinfo_summary(struct seq_file *m) +{ + struct device_node *root; + const char *model = NULL; +#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) + unsigned long bogosum = 0; + int i; + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); +#endif /* CONFIG_SMP && CONFIG_PPC32 */ + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + if (model) + seq_printf(m, "model\t\t: %s\n", model); + of_node_put(root); + + if (ppc_md.show_cpuinfo != NULL) + ppc_md.show_cpuinfo(m); + +#ifdef CONFIG_PPC32 + /* Display the amount of memory */ + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); +#endif +} + static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; @@ -169,35 +201,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short min; if (cpu_id == NR_CPUS) { - struct device_node *root; - const char *model = NULL; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) - unsigned long bogosum = 0; - int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); -#endif /* CONFIG_SMP && CONFIG_PPC32 */ - seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - if (ppc_md.name) - seq_printf(m, "platform\t: %s\n", ppc_md.name); - root = of_find_node_by_path("/"); - if (root) - model = of_get_property(root, "model", NULL); - if (model) - seq_printf(m, "model\t\t: %s\n", model); - of_node_put(root); - - if (ppc_md.show_cpuinfo != NULL) - ppc_md.show_cpuinfo(m); - -#ifdef CONFIG_PPC32 - /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); -#endif - + show_cpuinfo_summary(m); return 0; } -- cgit v1.2.2 From e6532c63cc3dbefc79936fc9c9c68a151004fe46 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:40 +0000 Subject: powerpc/cpumask: Convert /proc/cpuinfo to new cpumask API Use new cpumask API in /proc/cpuinfo code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 58699a43eaa2..8dcec47a7b39 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -200,11 +200,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - if (cpu_id == NR_CPUS) { - show_cpuinfo_summary(m); - return 0; - } - /* We only show online cpus: disable preempt (overzealous, I * knew) to prevent cpu going down. */ preempt_disable(); @@ -312,19 +307,28 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif preempt_enable(); + + /* If this is the last cpu, print the summary */ + if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) + show_cpuinfo_summary(m); + return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - unsigned long i = *pos; - - return i <= NR_CPUS ? (void *)(i + 1) : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = cpumask_first(cpu_online_mask); + else + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(unsigned long)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + (*pos)++; return c_start(m, pos); } -- cgit v1.2.2 From cc1ba8ea6dde3f049b2b365d8fdc13976aee25cb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:41 +0000 Subject: powerpc/cpumask: Dynamically allocate cpu_sibling_map and cpu_core_map cpumasks Dynamically allocate cpu_sibling_map and cpu_core_map cpumasks. We don't need to set_cpu_online() the boot cpu in smp_prepare_boot_cpu, init/main.c does it for us. We also postpone setting of the boot cpu in cpu_sibling_map and cpu_core_map until when the memory allocator is available (smp_prepare_cpus), similar to x86. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 62e82c25c583..39babb1e2ce1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,8 +59,8 @@ struct thread_info *secondary_ti; -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; -DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); @@ -271,6 +271,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(boot_cpuid); cpu_callin_map[boot_cpuid] = 1; + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + + cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + if (smp_ops) if (smp_ops->probe) max_cpus = smp_ops->probe(); @@ -289,10 +299,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - - set_cpu_online(boot_cpuid, true); - cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); - cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif @@ -525,15 +531,15 @@ int __devinit start_secondary(void *unused) for (i = 0; i < threads_per_core; i++) { if (cpu_is_offline(base + i)) continue; - cpu_set(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_set(base + i, per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); /* cpu_core_map should be a superset of * cpu_sibling_map even if we don't have cache * information, so update the former here, too. */ - cpu_set(cpu, per_cpu(cpu_core_map, base +i)); - cpu_set(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(base + i)); + cpumask_set_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); for_each_online_cpu(i) { @@ -541,8 +547,8 @@ int __devinit start_secondary(void *unused) if (!np) continue; if (np == l2_cache) { - cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } @@ -602,10 +608,10 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); for (i = 0; i < threads_per_core; i++) { - cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu)); - cpu_clear(cpu, per_cpu(cpu_core_map, base +i)); - cpu_clear(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); @@ -614,8 +620,8 @@ int __cpu_disable(void) if (!np) continue; if (np == l2_cache) { - cpu_clear(cpu, per_cpu(cpu_core_map, i)); - cpu_clear(i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } -- cgit v1.2.2 From 828a69869ba266cabb486a6b59ea8643d56b33ce Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:44 +0000 Subject: powerpc/cpumask: Update some comments Since the *_map cpumask variants are deprecated, change the comments to instead refer to *_mask. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 6 +++--- arch/powerpc/kernel/smp.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8dcec47a7b39..5e4d852f640c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -394,14 +394,14 @@ static void __init cpu_init_thread_core_maps(int tpc) /** * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map + * cpu_possible_mask + * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. + * cpu_online_mask as they come up. * * This function is valid only for Open Firmware systems. finish_device_tree * must be called before using this. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 39babb1e2ce1..bf366167d369 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -468,7 +468,7 @@ out: return id; } -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) -- cgit v1.2.2 From 8e6d5573af55435160d329f6ae3fe16a0abbdaec Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 8 May 2010 20:28:41 +1000 Subject: perf, powerpc: Implement group scheduling transactional APIs [paulus@samba.org: Set cpuhw->event[i]->hw.config in power_pmu_commit_txn.] Signed-off-by: Lin Ming Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra LKML-Reference: <20100508102841.GA10650@brick.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 129 +++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 61 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 08460a2e9f41..43b83c35cf54 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -35,6 +35,9 @@ struct cpu_hw_events { u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + + unsigned int group_flag; + int n_txn_start; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static void event_sched_in(struct perf_event *event) -{ - event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = smp_processor_id(); - event->tstamp_running += event->ctx->time - event->tstamp_stopped; - if (is_software_event(event)) - event->pmu->enable(event); -} - -/* - * Called to enable a whole group of events. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_event *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - struct cpu_hw_events *cpuhw; - long i, n, n0; - struct perf_event *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_events); - n0 = cpuhw->n_events; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->event[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_events = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update event states etc., - * and enable any software events - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - event_sched_in(group_leader); - list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { - if (sub->state != PERF_EVENT_STATE_OFF) { - event_sched_in(sub); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->event[n0] = event; cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + + /* + * If group events scheduling transaction was started, + * skip the schedulability test here, it will be peformed + * at commit time(->commit_txn) as a whole + */ + if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) + goto nocheck; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) goto out; - event->hw.config = cpuhw->events[n0]; + +nocheck: ++cpuhw->n_events; ++cpuhw->n_added; @@ -896,11 +849,65 @@ static void power_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } +/* + * Start group events scheduling transaction + * Set the flag to make pmu::enable() not perform the + * schedulability test, it will be performed at commit time + */ +void power_pmu_start_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->n_txn_start = cpuhw->n_events; +} + +/* + * Stop group events scheduling transaction + * Clear the flag and pmu::enable() will perform the + * schedulability test. + */ +void power_pmu_cancel_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; +} + +/* + * Commit group events scheduling transaction + * Perform the group schedulability test as a whole + * Return 0 if success + */ +int power_pmu_commit_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw; + long i, n; + + if (!ppmu) + return -EAGAIN; + cpuhw = &__get_cpu_var(cpu_hw_events); + n = cpuhw->n_events; + if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + if (i < 0) + return -EAGAIN; + + for (i = cpuhw->n_txn_start; i < n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + + return 0; +} + struct pmu power_pmu = { .enable = power_pmu_enable, .disable = power_pmu_disable, .read = power_pmu_read, .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, }; /* -- cgit v1.2.2 From 0fe1ac48bef018bed896307cd12f6ca9b5e704ab Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Apr 2010 20:46:04 +0000 Subject: powerpc/perf_event: Fix oops due to perf_event_do_pending call Anton Blanchard found that large POWER systems would occasionally crash in the exception exit path when profiling with perf_events. The symptom was that an interrupt would occur late in the exit path when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts should be hard-disabled at this point but they were enabled. Because the interrupt was not recoverable the system panicked. The reason is that the exception exit path was calling perf_event_do_pending after hard-disabling interrupts, and perf_event_do_pending will re-enable interrupts. The simplest and cleanest fix for this is to use the same mechanism that 32-bit powerpc does, namely to cause a self-IPI by setting the decrementer to 1. This means we can remove the tests in the exception exit path and raw_local_irq_restore. This also makes sure that the call to perf_event_do_pending from timer_interrupt() happens within irq_enter/irq_exit. (Note that calling perf_event_do_pending from timer_interrupt does not mean that there is a possible 1/HZ latency; setting the decrementer to 1 ensures that the timer interrupt will happen immediately, i.e. within one timebase tick, which is a few nanoseconds or 10s of nanoseconds.) Signed-off-by: Paul Mackerras Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/entry_64.S | 9 ------ arch/powerpc/kernel/irq.c | 6 ---- arch/powerpc/kernel/time.c | 60 +++++++++++++++++++++++++++++++-------- 4 files changed, 48 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..c09138d150d4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,6 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 07109d843787..42e9d908914a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_EVENTS - /* check paca->perf_event_pending if we're enabling ints */ - lbz r3,PACAPERFPEND(r13) - and. r3,r3,r5 - beq 27f - bl .perf_event_do_pending -27: -#endif /* CONFIG_PERF_EVENTS */ - /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f2031c22..066bd31551d5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include @@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } - /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a3e49a..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_event_pending); +#ifdef CONFIG_PERF_EVENTS -void set_perf_event_pending(void) +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_perf_event_pending(void) { - get_cpu_var(perf_event_pending) = 1; - set_dec(1); - put_cpu_var(perf_event_pending); + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_event_pending))); + return x; } +static inline void set_perf_event_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +static inline void clear_perf_event_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, perf_event_pending); + +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* 32 vs 64 bit */ + +void set_perf_event_pending(void) +{ + preempt_disable(); + set_perf_event_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_PERF_EVENTS */ #define test_perf_event_pending() 0 #define clear_perf_event_pending() -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS */ /* * For iSeries shared processors, we have to let the hypervisor @@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); + } + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; -- cgit v1.2.2 From 306d071f179c28c97688cb91c8585fd5ab840a9e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 1 Apr 2010 15:33:22 +0200 Subject: KVM: PPC: Don't export Book3S symbols on BookE Book3S knows how to convert floats to doubles and vice versa. BookE doesn't. So let's make sure we don't export them on BookE. This fixes a link error on BookE with CONFIG_KVM=y. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/ppc_ksyms.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 58fdb3a784de..bc9f39d2598b 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,8 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); +#ifndef CONFIG_BOOKE EXPORT_SYMBOL_GPL(cvt_df); EXPORT_SYMBOL_GPL(cvt_fd); +#endif EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); -- cgit v1.2.2 From 2191d657c9eaa4c444c33e014199ed9de1ac339d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:32 +0200 Subject: KVM: PPC: Name generic 64-bit code generic We have quite some code that can be used by Book3S_32 and Book3S_64 alike, so let's call it "Book3S" instead of "Book3S_64", so we can later on use it from the 32 bit port too. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/head_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index bed9a29ee383..844a44b64472 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -37,7 +37,7 @@ #include #include #include -#include +#include /* The physical memory is layed out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -169,7 +169,7 @@ exception_marker: /* KVM trampoline code needs to be close to the interrupt handlers */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#include "../kvm/book3s_64_rmhandlers.S" +#include "../kvm/book3s_rmhandlers.S" #endif _GLOBAL(generic_secondary_thread_init) -- cgit v1.2.2 From 00c3a37ca332f54f2187720e51f7c0e18e91d7c9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:42 +0200 Subject: KVM: PPC: Use CONFIG_PPC_BOOK3S define Upstream recently added a new name for PPC64: Book3S_64. So instead of using CONFIG_PPC64 we should use CONFIG_PPC_BOOK3S consotently. That makes understanding the code easier (I hope). Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..57a8c49c8830 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -426,8 +426,8 @@ int main(void) DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); - /* book3s_64 */ -#ifdef CONFIG_PPC64 + /* book3s */ +#ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); @@ -442,7 +442,7 @@ int main(void) #else DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC_BOOK3S */ #endif #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); -- cgit v1.2.2 From 0604675fe17f68741730cebe74422605bb79d972 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:44 +0200 Subject: KVM: PPC: Use now shadowed vcpu fields The shadow vcpu now contains some fields we don't use from the vcpu anymore. Access to them happens using inline functions that happily use the shadow vcpu fields. So let's now ifdef them out to booke only and add asm-offsets. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 91 +++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 57a8c49c8830..e8003ff81eef 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -50,6 +50,9 @@ #endif #ifdef CONFIG_KVM #include +#ifndef CONFIG_BOOKE +#include +#endif #endif #ifdef CONFIG_PPC32 @@ -191,33 +194,9 @@ int main(void) DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); - DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); - DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); - DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); - DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); - DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); - DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); - DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); - DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); - DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); - DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); - DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); - DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); - DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); - DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); - DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); - DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); - DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); - DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); - DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); - DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); - DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, - shadow_vcpu.vmhandler)); - DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, - shadow_vcpu.scratch0)); - DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, - shadow_vcpu.scratch1)); + DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); + DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); + DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); #endif #endif /* CONFIG_PPC64 */ @@ -412,9 +391,6 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); - DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); @@ -422,26 +398,67 @@ int main(void) DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); - DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); - DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); - DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); - /* book3s */ #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); - DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); - DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - + offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); + DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); + DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); + DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); + DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); + DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); + DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); + DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); + DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); + DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); + DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); + DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); + DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); + DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); + DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); + DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); + DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); + DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); + DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); + DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); + DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); + DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, + vmhandler)); + DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, + scratch0)); + DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, + scratch1)); + DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, + in_guest)); + DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, + fault_dsisr)); + DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, + fault_dar)); + DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, + last_inst)); + DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, + shadow_srr1)); +#ifdef CONFIG_PPC_BOOK3S_32 + DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); +#endif #else DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ #endif #ifdef CONFIG_44x -- cgit v1.2.2 From 97e492558f423d99c51eb934506b7a3d7c64613b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:51 +0200 Subject: KVM: PPC: Add SVCPU to Book3S_32 We need to keep the pointer to the shadow vcpu somewhere accessible from within really early interrupt code. The best fit I found was the thread struct, as that resides in an SPRG. So let's put a pointer to the shadow vcpu in the thread struct and add an asm-offset so we can find it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index e8003ff81eef..1804c2cd1dfd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -108,6 +108,9 @@ int main(void) DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER + DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); +#endif DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); -- cgit v1.2.2 From be85669886776faa496b026b344691c325727731 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:54 +0200 Subject: KVM: PPC: Export MMU variables Our shadow MMU code needs to know where the HTAB is located and how big it is. So we need some variables from the kernel exported to module space if KVM is built as a module. CC: Benjamin Herrenschmidt Signed-off-by: Alexander Graf Acked-by: Benjamin Herrenschmidt Signed-off-by: Avi Kivity --- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index bc9f39d2598b..2b7c43f95bb2 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -178,6 +178,11 @@ EXPORT_SYMBOL(switch_mmu_context); extern long mol_trampoline; EXPORT_SYMBOL(mol_trampoline); /* For MOL */ EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ + +extern struct hash_pte *Hash; +extern unsigned long _SDR1; +EXPORT_SYMBOL_GPL(Hash); /* For KVM */ +EXPORT_SYMBOL_GPL(_SDR1); /* For KVM */ #ifdef CONFIG_SMP extern int mmu_hash_lock; EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -- cgit v1.2.2 From 218d169c4c856eee7df56ea0fb8cbb32167e63d3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:55 +0200 Subject: PPC: Export SWITCH_FRAME_SIZE We need the SWITCH_FRAME_SIZE define on Book3S_32 now too. So let's export it unconditionally. CC: Benjamin Herrenschmidt Signed-off-by: Alexander Graf Acked-by: Benjamin Herrenschmidt Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1804c2cd1dfd..2716c51906d6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -210,8 +210,8 @@ int main(void) /* Interrupt register frame */ DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); -#ifdef CONFIG_PPC64 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); -- cgit v1.2.2 From dd84c21748d9280f210565429b1bdb9b6353e8d2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:57 +0200 Subject: KVM: PPC: Add KVM intercept handlers When an interrupt occurs we don't know yet if we're in guest context or in host context. When in guest context, KVM needs to handle it. So let's pull the same trick we did on Book3S_64: Just add a macro to determine if we're in guest context or not and if so jump on to KVM code. CC: Benjamin Herrenschmidt Signed-off-by: Alexander Graf Acked-by: Benjamin Herrenschmidt Signed-off-by: Avi Kivity --- arch/powerpc/kernel/head_32.S | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e025e89fe93e..98c4b29a56f4 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -33,6 +33,7 @@ #include #include #include +#include /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ @@ -303,6 +304,7 @@ __secondary_hold_acknowledge: */ #define EXCEPTION(n, label, hdlr, xfer) \ . = n; \ + DO_KVM n; \ label: \ EXCEPTION_PROLOG; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -358,6 +360,7 @@ i##n: \ * -- paulus. */ . = 0x200 + DO_KVM 0x200 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfcr r10 @@ -381,6 +384,7 @@ i##n: \ /* Data access exception. */ . = 0x300 + DO_KVM 0x300 DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR @@ -397,6 +401,7 @@ DataAccess: /* Instruction access exception. */ . = 0x400 + DO_KVM 0x400 InstructionAccess: EXCEPTION_PROLOG andis. r0,r9,0x4000 /* no pte found? */ @@ -413,6 +418,7 @@ InstructionAccess: /* Alignment exception */ . = 0x600 + DO_KVM 0x600 Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR @@ -427,6 +433,7 @@ Alignment: /* Floating-point unavailable */ . = 0x800 + DO_KVM 0x800 FPUnavailable: BEGIN_FTR_SECTION /* @@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) /* System call */ . = 0xc00 + DO_KVM 0xc00 SystemCall: EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) @@ -467,9 +475,11 @@ SystemCall: * by executing an altivec instruction. */ . = 0xf00 + DO_KVM 0xf00 b PerformanceMonitor . = 0xf20 + DO_KVM 0xf20 b AltiVecUnavailable /* @@ -882,6 +892,10 @@ __secondary_start: RFI #endif /* CONFIG_SMP */ +#ifdef CONFIG_KVM_BOOK3S_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + /* * Those generic dummy functions are kept for CPUs not * included in CONFIG_6xx -- cgit v1.2.2 From 251585b5d02152973dbc24c803ca322bb977d4a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 20 Apr 2010 02:49:53 +0200 Subject: KVM: PPC: Find HTAB ourselves For KVM we need to find the location of the HTAB. We can either rely on internal data structures of the kernel or ask the hardware. Ben issued complaints about the internal data structure method, so let's switch it to our own inquiry of the HTAB. Now we're fully independend :-). CC: Benjamin Herrenschmidt Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/ppc_ksyms.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 2b7c43f95bb2..bc9f39d2598b 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -178,11 +178,6 @@ EXPORT_SYMBOL(switch_mmu_context); extern long mol_trampoline; EXPORT_SYMBOL(mol_trampoline); /* For MOL */ EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ - -extern struct hash_pte *Hash; -extern unsigned long _SDR1; -EXPORT_SYMBOL_GPL(Hash); /* For KVM */ -EXPORT_SYMBOL_GPL(_SDR1); /* For KVM */ #ifdef CONFIG_SMP extern int mmu_hash_lock; EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -- cgit v1.2.2 From 78e2e68a2b79f394b7cd61e07987a8a89af907f7 Mon Sep 17 00:00:00 2001 From: Li Yang Date: Fri, 7 May 2010 16:38:34 +0800 Subject: powerpc/fsl-booke: Fix InstructionTLBError execute permission check In CONFIG_PTE_64BIT the PTE format has unique permission bits for user and supervisor execute. However on !CONFIG_PTE_64BIT we overload the supervisor bit to imply user execute with _PAGE_USER set. This allows us to use the same permission check mask for user or supervisor code on !CONFIG_PTE_64BIT. However, on CONFIG_PTE_64BIT we map _PAGE_EXEC to _PAGE_BAP_UX so we need a different permission mask based on the fault coming from a kernel address or user space. Without unique permission masks we see issues like the following with modules: Unable to handle kernel paging request for instruction fetch Faulting instruction address: 0xf938d040 Oops: Kernel access of bad area, sig: 11 [#1] Signed-off-by: Li Yang Signed-off-by: Jin Qing Signed-off-by: Kumar Gala --- arch/powerpc/kernel/head_fsl_booke.S | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 725526547994..edd4a57fd29e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -639,6 +639,13 @@ interrupt_base: rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + /* Make up the required permissions for kernel code */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_BAP_SX + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif b 4f /* Get the PGD for the current thread */ @@ -646,15 +653,15 @@ interrupt_base: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) -4: - /* Make up the required permissions */ + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT | _PAGE_EXEC + li r13,_PAGE_PRESENT | _PAGE_BAP_UX oris r13,r13,_PAGE_ACCESSED@h #else li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC #endif +4: FIND_PTE andc. r13,r13,r11 /* Check permission */ -- cgit v1.2.2 From 78f622377f7d31d988db350a43c5689dd5f31876 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 13 May 2010 14:38:21 -0500 Subject: powerpc/fsl-booke: Move loadcam_entry back to asm code to fix SMP ftrace When we build with ftrace enabled its possible that loadcam_entry would have used the stack pointer (even though the code doesn't need it). We call loadcam_entry in __secondary_start before the stack is setup. To ensure that loadcam_entry doesn't use the stack pointer the easiest solution is to just have it in asm code. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/asm-offsets.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..0271b58ec31e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -448,6 +448,14 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_FSL_BOOKE + DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); + DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); + DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); + DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2)); + DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3)); + DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); +#endif #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, -- cgit v1.2.2 From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:21 -0500 Subject: kgdb: core changes to support kdb These are the minimum changes to the kgdb core in order to enable an API to connect a new front end (kdb) to the debug core. This patch introduces the dbg_kdb_mode variable controls where the user level I/O is routed. It will be routed to the gdbstub (kgdb) or to the kdb front end which is a simple shell available over the kgdboc connection. You can switch back and forth between kdb or the gdb stub mode of operation dynamically. From gdb stub mode you can blindly type "$3#33", or from the kdb mode you can enter "kgdb" to switch to the gdb stub. The logic in the debug core depends on kdb to look for the typical gdb connection sequences and return immediately with KGDB_PASS_EVENT if a gdb serial command sequence is detected. That should allow a reasonably seamless transition between kdb -> gdb without leaving the kernel exception state. The two gdb serial queries that kdb is responsible for detecting are the "?" and "qSupported" packets. CC: Ingo Molnar Signed-off-by: Jason Wessel Acked-by: Martin Hicks --- arch/powerpc/kernel/kgdb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 41bada0298c8..c81e3de1306e 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -309,6 +309,11 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->nip = pc; +} + /* * This function does PowerPC specific procesing for interfacing to gdb. */ -- cgit v1.2.2 From ba797b28131b1f1367b662936ea370239d603cff Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:25 -0500 Subject: powerpc,kgdb: Introduce low level trap catching The only way the debugger can handle a trap in inside rcu_lock, notify_die, or atomic_notifier_call_chain without a recursive fault is to allow the kernel debugger to handle the exception first in program_check_exception(). The other change here is to make sure that kgdb_handle_exception() is called with correct parameters when catching an oops, because kdb needs to know if the entry was an oops, single step, or breakpoint exception. [benh@kernel.crashing.org: move debugger_bpt instead of #ifdef] CC: Paul Mackerras Signed-off-by: Jason Wessel Acked-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/kgdb.c | 6 ++++-- arch/powerpc/kernel/traps.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c81e3de1306e..82a7b228c81a 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -115,7 +116,8 @@ void kgdb_roundup_cpus(unsigned long flags) /* KGDB functions to use existing PowerPC64 hooks. */ static int kgdb_debugger(struct pt_regs *regs) { - return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return !kgdb_handle_exception(1, computeSignal(TRAP(regs)), + DIE_OOPS, regs); } static int kgdb_handle_breakpoint(struct pt_regs *regs) @@ -123,7 +125,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0) + if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0) return 0; if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 29d128eb6c43..b6859aade9c2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -815,12 +815,15 @@ void __kprobes program_check_exception(struct pt_regs *regs) return; } if (reason & REASON_TRAP) { + /* Debugger is first in line to stop recursive faults in + * rcu_lock, notify_die, or atomic_notifier_call_chain */ + if (debugger_bpt(regs)) + return; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) return; - if (debugger_bpt(regs)) - return; if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { -- cgit v1.2.2 From 7358650e9e9a81c854dc4582b4193eb5ea500bf6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 May 2010 02:12:32 +0000 Subject: powerpc/rtasd: Don't start event scan if scan rate is zero There appear to be Pegasos systems which have the rtas-event-scan RTAS tokens, but on which the event scan always fails. They also have an event-scan-rate property containing 0, which means call event scan 0 times per minute. So interpret a scan rate of 0 to mean don't scan at all. This fixes the problem on the Pegasos machines and makes sense as well. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e907ca66f75a..638883e23e3a 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -490,6 +490,12 @@ static int __init rtas_init(void) return -ENODEV; } + if (!rtas_event_scan_rate) { + /* Broken firmware: take a rate of zero to mean don't scan */ + printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n"); + return 0; + } + /* Make room for the sequence number */ rtas_error_log_max = rtas_get_error_log_max(); rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); -- cgit v1.2.2 From abb17f9c3a92c5acf30e749efdf0419b7f50a5b8 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Wed, 19 May 2010 02:56:29 +0000 Subject: powerpc: Use common cpu_die (fixes SMP+SUSPEND build) Configuring a powerpc 32 bit kernel for both SMP and SUSPEND turns on CPU_HOTPLUG to enable disable_nonboot_cpus to be called by the common suspend code. Previously the definition of cpu_die for ppc32 was in the powermac platform code, causing it to be undefined if that platform as not selected. arch/powerpc/kernel/built-in.o: In function 'cpu_idle': arch/powerpc/kernel/idle.c:98: undefined reference to 'cpu_die' Move the code from setup_64 to smp.c and rename the power mac versions to their specific names. Note that this does not setup the cpu_die pointers in either smp_ops (request a given cpu die) or ppc_md (make this cpu die), for other platforms but there are generic versions in smp.c. Reported-by: Matt Sealey Reported-by: Kumar Gala Signed-off-by: Milton Miller Signed-off-by: Anton Vorontsov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 6 ------ arch/powerpc/kernel/smp.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 914389158a9b..cea66987a6ea 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -573,12 +573,6 @@ void ppc64_boot_msg(unsigned int src, const char *msg) printk("[boot]%04x %s\n", src, msg); } -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bf366167d369..5c196d1086d9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -648,4 +648,10 @@ void cpu_hotplug_driver_unlock() { mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); } + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); +} #endif -- cgit v1.2.2 From a1263c71448aa70afb6097fdedf93c3dff5a7a15 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 14 May 2010 12:04:41 +0000 Subject: powerpc/vio: Switch VIO Bus PM to use generic helpers Switch to use the generic power management helpers. Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b8e311d64ad5..9ce7b62dc3a4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1381,29 +1381,6 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } -static int vio_pm_suspend(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - if (pm && pm->suspend) - return pm->suspend(dev); - return 0; -} - -static int vio_pm_resume(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - if (pm && pm->resume) - return pm->resume(dev); - return 0; -} - -const struct dev_pm_ops vio_dev_pm_ops = { - .suspend = vio_pm_suspend, - .resume = vio_pm_resume, -}; - static struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, @@ -1411,7 +1388,7 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = &vio_dev_pm_ops, + .pm = GENERIC_SUBSYS_PM_OPS, }; /** -- cgit v1.2.2 From 5b339bdf164d8aee394609768f7e2e4415b0252a Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Mon, 10 May 2010 15:13:41 +0000 Subject: powerpc/pci: Check devices status property when scanning OF tree We ran into an issue where it looks like we're not properly ignoring a pci device with a non-good status property when we walk the device tree and instanciate the Linux side PCI devices. However, the EEH init code does look for the property and disables EEH on these devices. This leaves us in an inconsistent where we are poking at a supposedly bad piece of hardware and RTAS will block our config cycles because EEH isn't enabled anyway. Signed-of-by: Sonny Rao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_of_scan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index cd11d5ca80df..6ddb795f83e8 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -310,6 +310,8 @@ static void __devinit __of_scan_bus(struct device_node *node, /* Scan direct children */ for_each_child_of_node(node, child) { pr_debug(" * %s\n", child->full_name); + if (!of_device_is_available(child)) + continue; reg = of_get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; -- cgit v1.2.2 From 426b6cb478e60352a463a0d1ec75c1c9fab30b13 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Tue, 11 May 2010 05:41:08 +0000 Subject: powerpc/crashdump: Do not fail on NULL pointer dereferencing Signed-off-by: Maxim Uvarov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 6f4613dd05ef..341d8af6f33e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -375,6 +375,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) for_each_irq(i) { struct irq_desc *desc = irq_to_desc(i); + if (!desc || !desc->chip || !desc->chip->eoi) + continue; + if (desc->status & IRQ_INPROGRESS) desc->chip->eoi(i); -- cgit v1.2.2 From 0644079410065567e3bb31fcb8e6441f2b7685a9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 16:25:51 +0000 Subject: powerpc/kdump: CPUs assume the context of the oopsing CPU We wrap the crash_shutdown_handles[] calls with longjmp/setjmp, so if any of them fault we can recover. The problem is we add a hook to the debugger fault handler hook which calls longjmp unconditionally. This first part of kdump is run before we marshall the other CPUs, so there is a very good chance some CPU on the box is going to page fault. And when it does it hits the longjmp code and assumes the context of the oopsing CPU. The machine gets very confused when it has 10 CPUs all with the same stack, all thinking they have the same CPU id. I get even more confused trying to debug it. The patch below adds crash_shutdown_cpu and uses it to specify which cpu is in the protected region. Since it can only be -1 or the oopsing CPU, we don't need to use memory barriers since it is only valid on the local CPU - no other CPU will ever see a value that matches it's local CPU id. Eventually we should switch the order and marshall all CPUs before doing the crash_shutdown_handles[] calls, but that is a bigger fix. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 341d8af6f33e..7ced58dacb12 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -347,10 +347,12 @@ int crash_shutdown_unregister(crash_shutdown_t handler) EXPORT_SYMBOL(crash_shutdown_unregister); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; static int handle_fault(struct pt_regs *regs) { - longjmp(crash_shutdown_buf, 1); + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); return 0; } @@ -391,6 +393,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); for (i = 0; crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* @@ -404,6 +407,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) asm volatile("sync; isync"); } } + crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; /* -- cgit v1.2.2 From 5d7a87217de48b234b3c8ff8a73059947d822e07 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 16:27:38 +0000 Subject: powerpc/kdump: Use chip->shutdown to disable IRQs I saw this in a kdump kernel: IOMMU table initialized, virtual merging enabled Interrupt 155954 (real) is invalid, disabling it. Interrupt 155953 (real) is invalid, disabling it. ie we took some spurious interrupts. default_machine_crash_shutdown tries to disable all interrupt sources but uses chip->disable which maps to the default action of: static void default_disable(unsigned int irq) { } If we use chip->shutdown, then we actually mask the IRQ: static void default_shutdown(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); desc->chip->mask(irq); desc->status |= IRQ_MASKED; } Not sure why we don't implement a ->disable action for xics.c, or why default_disable doesn't mask the interrupt. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 7ced58dacb12..cca7c8fafc1c 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -384,7 +384,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) desc->chip->eoi(i); if (!(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); + desc->chip->shutdown(i); } /* -- cgit v1.2.2 From 095c7965f4dc870ed2b65143b1e2610de653416c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 18:59:18 +0000 Subject: powerpc: Use more accurate limit for first segment memory allocations Author: Milton Miller On large machines we are running out of room below 256MB. In some cases we only need to ensure the allocation is in the first segment, which may be 256MB or 1TB. Add slb0_limit and use it to specify the upper limit for the irqstack and emergency stacks. On a large ppc64 box, this fixes a panic at boot when the crashkernel= option is specified (previously we would run out of memory below 256MB). Signed-off-by: Milton Miller Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index cea66987a6ea..f3fb5a79de52 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -424,9 +424,18 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } +static u64 slb0_limit(void) +{ + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + return 1UL << SID_SHIFT_1T; + } + return 1UL << SID_SHIFT; +} + #ifdef CONFIG_IRQSTACKS static void __init irqstack_early_init(void) { + u64 limit = slb0_limit(); unsigned int i; /* @@ -436,10 +445,10 @@ static void __init irqstack_early_init(void) for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + THREAD_SIZE, limit)); } } #else @@ -470,7 +479,7 @@ static void __init exc_lvl_early_init(void) */ static void __init emergency_stack_init(void) { - unsigned long limit; + u64 limit; unsigned int i; /* @@ -482,7 +491,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000ULL, lmb.rmo_size); + limit = min(slb0_limit(), lmb.rmo_size); for_each_possible_cpu(i) { unsigned long sp; -- cgit v1.2.2 From 1fc711f7ffb01089efc58042cfdbac8573d1b59a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 13 May 2010 19:40:11 +0000 Subject: powerpc/kexec: Fix race in kexec shutdown In kexec_prepare_cpus, the primary CPU IPIs the secondary CPUs to kexec_smp_down(). kexec_smp_down() calls kexec_smp_wait() which sets the hw_cpu_id() to -1. The primary does this while leaving IRQs on which means the primary can take a timer interrupt which can lead to the IPIing one of the secondary CPUs (say, for a scheduler re-balance) but since the secondary CPU now has a hw_cpu_id = -1, we IPI CPU -1... Kaboom! We are hitting this case regularly on POWER7 machines. There is also a second race, where the primary will tear down the MMU mappings before knowing the secondaries have entered real mode. Also, the secondaries are clearing out any pending IPIs before guaranteeing that no more will be received. This changes kexec_prepare_cpus() so that we turn off IRQs in the primary CPU much earlier. It adds a paca flag to say that the secondaries have entered the kexec_smp_down() IPI and turned off IRQs, rather than overloading hw_cpu_id with -1. This new paca flag is again used to in indicate when the secondaries has entered real mode. It also ensures that all CPUs have their IRQs off before we clear out any pending IPI requests (in kexec_cpu_down()) to ensure there are no trailing IPIs left unacknowledged. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/machine_kexec_64.c | 48 ++++++++++++++++++++++++---------- arch/powerpc/kernel/misc_64.S | 8 +++--- arch/powerpc/kernel/paca.c | 2 ++ 4 files changed, 42 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0271b58ec31e..1b784ff92d9d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -184,6 +184,7 @@ int main(void) #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 040bd1de8d99..26f9900f773c 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -155,33 +155,38 @@ void kexec_copy_flush(struct kimage *image) #ifdef CONFIG_SMP -/* FIXME: we should schedule this function to be called on all cpus based - * on calling the interrupts, but we would like to call it off irq level - * so that the interrupt controller is clean. - */ +static int kexec_all_irq_disabled = 0; + static void kexec_smp_down(void *arg) { + local_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + while(kexec_all_irq_disabled == 0) + cpu_relax(); + mb(); /* make sure all irqs are disabled before this */ + /* + * Now every CPU has IRQs off, we can clear out any pending + * IPIs and be sure that no more will come in after this. + */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); - local_irq_disable(); kexec_smp_wait(); /* NOTREACHED */ } -static void kexec_prepare_cpus(void) +static void kexec_prepare_cpus_wait(int wait_state) { int my_cpu, i, notified=-1; - smp_call_function(kexec_smp_down, NULL, /* wait */0); my_cpu = get_cpu(); - - /* check the others cpus are now down (via paca hw cpu id == -1) */ + /* Make sure each CPU has atleast made it to the state we need */ for (i=0; i < NR_CPUS; i++) { if (i == my_cpu) continue; - while (paca[i].hw_cpu_id != -1) { + while (paca[i].kexec_state < wait_state) { barrier(); if (!cpu_possible(i)) { printk("kexec: cpu %d hw_cpu_id %d is not" @@ -201,20 +206,35 @@ static void kexec_prepare_cpus(void) } if (i != notified) { printk( "kexec: waiting for cpu %d (physical" - " %d) to go down\n", - i, paca[i].hw_cpu_id); + " %d) to enter %i state\n", + i, paca[i].hw_cpu_id, wait_state); notified = i; } } } + mb(); +} + +static void kexec_prepare_cpus(void) +{ + + smp_call_function(kexec_smp_down, NULL, /* wait */0); + local_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + + kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); + /* we are sure every CPU has IRQs off at this point */ + kexec_all_irq_disabled = 1; /* after we tell the others to go down */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); - put_cpu(); + /* Before removing MMU mapings make sure all CPUs have entered real mode */ + kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); - local_irq_disable(); + put_cpu(); } #else /* ! SMP */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index a5cf9c1356a6..a2b18dffa03e 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -24,6 +24,7 @@ #include #include #include +#include .text @@ -471,6 +472,10 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -494,14 +499,11 @@ kexec_flag: * note: this is a terminal routine, it does not save lr * * get phys id from paca - * set paca id to -1 to say we got here * switch to real mode * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) - li r4,-1 - sth r4,PACAHWCPUID(r13) /* let others know we left */ bl real_mode b .kexec_wait diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0c40c6f476fe..f88acf0218db 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -18,6 +18,7 @@ #include #include #include +#include /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -97,6 +98,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kernelbase = (unsigned long) _stext; new_paca->kernel_msr = MSR_KERNEL; new_paca->hw_cpu_id = 0xffff; + new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = &slb_shadow[cpu]; -- cgit v1.2.2 From 60adec6226bbcf061d4c2d10944fced209d1847d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 13 May 2010 19:40:11 +0000 Subject: powerpc/kdump: Fix race in kdump shutdown When we are crashing, the crashing/primary CPU IPIs the secondaries to turn off IRQs, go into real mode and wait in kexec_wait. While this is happening, the primary tears down all the MMU maps. Unfortunately the primary doesn't check to make sure the secondaries have entered real mode before doing this. On PHYP machines, the secondaries can take a long time shutting down the IRQ controller as RTAS calls are need. These RTAS calls need to be serialised which resilts in the secondaries contending in lock_rtas() and hence taking a long time to shut down. We've hit this on large POWER7 machines, where some secondaries are still waiting in lock_rtas(), when the primary tears down the HPTEs. This patch makes sure all secondaries are in real mode before the primary tears down the MMU. It uses the new kexec_state entry in the paca. It times out if the secondaries don't reach real mode after 10sec. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index cca7c8fafc1c..8c066d6a8e4b 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -162,6 +162,32 @@ static void crash_kexec_prepare_cpus(int cpu) /* Leave the IPI callback set */ } +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < NR_CPUS && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} + /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -419,6 +445,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); crash_kexec_stop_spus(); + crash_kexec_wait_realmode(crashing_cpu); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } -- cgit v1.2.2 From dd04c63c96425af9b6741f3abf0ad25d6b1c0e8d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 16 May 2010 20:01:28 +0000 Subject: powerpc: Remove check of ibm,smt-snooze-delay OF property I'm not sure why we have code for parsing an ibm,smt-snooze-delay OF property. Since we have a smt-snooze-delay= boot option and we can also set it at runtime via sysfs, it should be safe to get rid of this code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e235e52dc4fe..158fb731e199 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -67,33 +67,6 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev, static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); -/* Only parse OF options if the matching cmdline option was not specified */ -static int smt_snooze_cmdline; - -static int __init smt_setup(void) -{ - struct device_node *options; - const unsigned int *val; - unsigned int cpu; - - if (!cpu_has_feature(CPU_FTR_SMT)) - return -ENODEV; - - options = of_find_node_by_path("/options"); - if (!options) - return -ENODEV; - - val = of_get_property(options, "ibm,smt-snooze-delay", NULL); - if (!smt_snooze_cmdline && val) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = *val; - } - - of_node_put(options); - return 0; -} -__initcall(smt_setup); - static int __init setup_smt_snooze_delay(char *str) { unsigned int cpu; @@ -102,8 +75,6 @@ static int __init setup_smt_snooze_delay(char *str) if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - smt_snooze_cmdline = 1; - if (get_option(&str, &snooze)) { for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = snooze; -- cgit v1.2.2 From b878dc00595440586874952dd85ce9b803360b87 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 16 May 2010 20:02:39 +0000 Subject: powerpc: Use smt_snooze_delay=-1 to always busy loop Right now if we want to busy loop and not give up any time to the hypervisor we put a very large value into smt_snooze_delay. This is sometimes useful when running a single partition and you want to avoid any latencies due to the hypervisor or CPU power state transitions. While this works, it's a bit ugly - how big a number is enough now we have NO_HZ and can be idle for a very long time. The patch below makes smt_snooze_delay signed, and a negative value means loop forever: echo -1 > /sys/devices/system/cpu/cpu0/smt_snooze_delay This change shouldn't affect the existing userspace tools (eg ppc64_cpu), but I'm cc-ing Nathan just to be sure. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 158fb731e199..c0d8c2006bf4 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; +DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct sys_device *dev, struct sysdev_attribute *attr, @@ -44,9 +44,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); ssize_t ret; - unsigned long snooze; + long snooze; - ret = sscanf(buf, "%lu", &snooze); + ret = sscanf(buf, "%ld", &snooze); if (ret != 1) return -EINVAL; @@ -61,7 +61,7 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); - return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); } static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -70,15 +70,14 @@ static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { unsigned int cpu; - int snooze; + long snooze; if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - if (get_option(&str, &snooze)) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - } + snooze = simple_strtol(str, NULL, 10); + for_each_possible_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; return 1; } -- cgit v1.2.2 From 99ec28f183daa450faa7bdad6f932364ae325648 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 9 May 2010 17:39:05 +0000 Subject: powerpc: Remove unused 'protect4gb' boot parameter 'protect4gb' boot parameter was introduced to avoid allocating dma space acrossing 4GB boundary in 2007 (the commit 569975591c5530fdc9c7a3c45122e5e46f075a74). In 2008, the IOMMU was fixed to use the boundary_mask parameter per device properly. So 'protect4gb' workaround was removed (the 383af9525bb27f927511874f6306247ec13f1c28). But somehow I messed the 'protect4gb' boot parameter that was used to enable the workaround. Signed-off-by: FUJITA Tomonori Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/iommu.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ec94f906ea43..d5839179ec77 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -43,20 +43,9 @@ #define DBG(...) static int novmerge; -static int protect4gb = 1; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); -static int __init setup_protect4gb(char *str) -{ - if (strcmp(str, "on") == 0) - protect4gb = 1; - else if (strcmp(str, "off") == 0) - protect4gb = 0; - - return 1; -} - static int __init setup_iommu(char *str) { if (!strcmp(str, "novmerge")) @@ -66,7 +55,6 @@ static int __init setup_iommu(char *str) return 1; } -__setup("protect4gb=", setup_protect4gb); __setup("iommu=", setup_iommu); static unsigned long iommu_range_alloc(struct device *dev, -- cgit v1.2.2