From 9c8b39077b0a5b0990bb707fcd7b5913d7b322b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Nov 2011 19:56:05 -0500 Subject: powerpc: Fix build breakage in jump_label.c Should do what other architectures do and wrap all that code into the appropriate ifdef Signed-off-by: Al Viro Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/jump_label.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 368d158d665d..a1ed8a8c7cb4 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,6 +11,7 @@ #include #include +#ifdef HAVE_JUMP_LABEL void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { @@ -21,3 +22,4 @@ void arch_jump_label_transform(struct jump_entry *entry, else patch_instruction(addr, PPC_INST_NOP); } +#endif -- cgit v1.2.2 From bbc24a25e29136a56cf5015ef23eb2c2e8828023 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Mon, 14 Nov 2011 16:43:50 +0000 Subject: powerpc/4xx: Fix typos in kexec config dependencies Kexec is not supported on 47x. 47x is a variant of 44x with slightly different MMU and SMP support. There was a typo in the config dependency for kexec. This patch fixes the same. Signed-off-by: Suzuki K. Poulose Signed-off-by: Paul Bolle Cc: Kumar Gala Cc: Josh Boyer Cc: linux ppc dev Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/misc_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index f7d760ab5ca1..7cd07b42ca1a 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -738,7 +738,7 @@ relocate_new_kernel: mr r5, r31 li r0, 0 -#elif defined(CONFIG_44x) && !defined(CONFIG_47x) +#elif defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) /* * Code for setting up 1:1 mapping for PPC440x for KEXEC -- cgit v1.2.2 From 6d1e2c6c1a0b800473db4df8595c95745be548ea Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 14 Nov 2011 12:55:47 +0000 Subject: powerpc: panic if we can't instantiate RTAS I had to debug a strange situation where all manner of things were failing. SMT threads, storage and network were all completely broken. The root cause was we couldn't find enough memory to instantiate RTAS - this was a network install so the initrd was huge. Instead of limping along and failing in mysterious ways we should just panic up front if RTAS exists and we can't allocate space for it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index b4fa66127495..cc584865b3df 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1579,10 +1579,8 @@ static void __init prom_instantiate_rtas(void) return; base = alloc_down(size, PAGE_SIZE, 0); - if (base == 0) { - prom_printf("RTAS allocation failed !\n"); - return; - } + if (base == 0) + prom_panic("Could not allocate memory for RTAS\n"); rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); if (!IHANDLE_VALID(rtas_inst)) { -- cgit v1.2.2 From d715e433b7ad19c02fc4becf0d5e9a59f97925de Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 14 Nov 2011 12:54:47 +0000 Subject: powerpc: Copy down exception vectors after feature fixups kdump fails because we try to execute an HV only instruction. Feature fixups are being applied after we copy the exception vectors down to 0 so they miss out on any updates. We have always had this issue but it only became critical in v3.0 when we added CFAR support (breaks POWER5) and v3.1 when we added POWERNV (breaks everyone). Signed-off-by: Anton Blanchard Cc: [v3.0+] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/kvm.c | 1 - arch/powerpc/kernel/setup_32.c | 2 ++ arch/powerpc/kernel/setup_64.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 35f27646c4ff..2985338d0e10 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -132,7 +132,6 @@ static void kvm_patch_ins_b(u32 *inst, int addr) /* On relocatable kernels interrupts handlers and our code can be in different regions, so we don't patch them */ - extern u32 __end_interrupts; if ((ulong)inst < (ulong)&__end_interrupts) return; #endif diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index c1ce86357ecb..ac7610815113 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -107,6 +107,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) PTRRELOC(&__start___lwsync_fixup), PTRRELOC(&__stop___lwsync_fixup)); + do_final_fixups(); + return KERNELBASE + offset; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1a9dea80a69b..fb9bb46e7e88 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -359,6 +359,7 @@ void __init setup_system(void) &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); do_lwsync_fixups(cur_cpu_spec->cpu_features, &__start___lwsync_fixup, &__stop___lwsync_fixup); + do_final_fixups(); /* * Unflatten the device-tree passed by prom_init or kexec -- cgit v1.2.2 From 2cd76629f6c40f7b227ba7b3885ce10e6ec0face Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 10 Nov 2011 16:04:17 +0000 Subject: powerpc/trace: Add a dummy stack frame for trace_hardirqs_off The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. If an exception occurs in user mode, there is only one stack frame on the stack and accessing the CALLER_ADDR1 will causes the following call trace. So we create a dummy stack frame to make trace_hardirqs_off happy. WARNING: at kernel/smp.c:459 Modules linked in: NIP: c0093280 LR: c00930a0 CTR: c0010780 REGS: edb87ae0 TRAP: 0700 Not tainted (3.1.0) MSR: 00021002 CR: 28002888 XER: 00000000 TASK = edce2ac0[17658] 'mthread-lock-on' THREAD: edb86000 CPU: 5 GPR00: 00000001 edb87b90 edce2ac0 00000005 c0019594 edb87bd8 00000001 00000fe3 GPR08: 00041000 c084138c 4e20120d edb87b90 48002888 1001aa7c 00000000 00000000 GPR16: 48830000 10012a8c 00000000 10000af4 00000001 c0810000 00000000 00000000 GPR24: ee9aa920 c0816a18 00000000 00000005 c0019594 edb87bd8 ee20178c edb87b90 NIP [c0093280] smp_call_function_many+0x214/0x2b4 LR [c00930a0] smp_call_function_many+0x34/0x2b4 Call Trace: [edb87b90] [c00930a0] smp_call_function_many+0x34/0x2b4 (unreliable) [edb87bd0] [c00194ec] __flush_tlb_page+0xac/0x100 [edb87c00] [c001957c] flush_tlb_page+0x3c/0x54 [edb87c10] [c00180ac] ptep_set_access_flags+0x74/0x12c [edb87c40] [c0128068] handle_pte_fault+0x2f0/0x9ac [edb87cb0] [c0128c3c] handle_mm_fault+0x104/0x1dc [edb87ce0] [c05f40f4] do_page_fault+0x2dc/0x630 [edb87e50] [c001078c] handle_page_fault+0xc/0x80 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_32.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc0ab08..4f80cf1ce77b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -215,7 +215,22 @@ reenable_mmu: /* re-enable mmu so we can */ stw r9,8(r1) stw r11,12(r1) stw r3,ORIG_GPR3(r1) + /* + * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. + * If from user mode there is only one stack frame on the stack, and + * accessing CALLER_ADDR1 will cause oops. So we need create a dummy + * stack frame to make trace_hardirqs_off happy. + */ + andi. r12,r12,MSR_PR + beq 11f + stwu r1,-16(r1) + bl trace_hardirqs_off + addi r1,r1,16 + b 12f + +11: bl trace_hardirqs_off +12: lwz r0,GPR0(r1) lwz r3,ORIG_GPR3(r1) lwz r4,GPR4(r1) -- cgit v1.2.2 From ba28c9aae26ef7f3651eef6835fae30a979f88ba Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 6 Oct 2011 02:53:38 +0000 Subject: powerpc: Revert show_regs() define for readability We had an existing ifdef for 4xx & BOOKE processors that got changed to CONFIG_PPC_ADV_DEBUG_REGS. The define has nothing to do with CONFIG_PPC_ADV_DEBUG_REGS. The define really should be: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) and not #ifdef CONFIG_PPC_ADV_DEBUG_REGS Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9054ca9ab4f9..ad3612af0a04 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -657,7 +657,7 @@ void show_regs(struct pt_regs * regs) if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) printk("CFAR: "REG"\n", regs->orig_gpr3); if (trap == 0x300 || trap == 0x600) -#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); -- cgit v1.2.2 From b95bc2191412f5ecf2781c966110a13fa82a80d3 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 6 Oct 2011 02:53:39 +0000 Subject: powerpc: Remove extraneous CONFIG_PPC_ADV_DEBUG_REGS define All of DebugException is already protected by CONFIG_PPC_ADV_DEBUG_REGS there is no need to have another such ifdef inside the function. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4e5908264d1a..5459d148a0f6 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1298,14 +1298,12 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) if (user_mode(regs)) { current->thread.dbcr0 &= ~DBCR0_IC; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) regs->msr |= MSR_DE; else /* Make sure the IDM bit is off */ current->thread.dbcr0 &= ~DBCR0_IDM; -#endif } _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); -- cgit v1.2.2 From 187b9f2aa769198daa7cf8054abb65a08b8d8b47 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 6 Oct 2011 02:53:40 +0000 Subject: powerpc/book3e-64: Fix debug support for userspace With the introduction of CONFIG_PPC_ADV_DEBUG_REGS user space debug is broken on Book-E 64-bit parts that support delayed debug events. When switch_booke_debug_regs() sets DBCR0 we'll start getting debug events as MSR_DE is also set and we aren't able to handle debug events from kernel space. We can remove the hack that always enables MSR_DE and loads up DBCR0 and just utilize switch_booke_debug_regs() to get user space debug working again. We still need to handle critical/debug exception stacks & proper save/restore of state for those exception levles to support debug events from kernel space like we have on 32-bit. Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ad3612af0a04..6457574c0b2f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -486,28 +486,6 @@ struct task_struct *__switch_to(struct task_struct *prev, new_thread = &new->thread; old_thread = ¤t->thread; -#if defined(CONFIG_PPC_BOOK3E_64) - /* XXX Current Book3E code doesn't deal with kernel side DBCR0, - * we always hold the user values, so we set it now. - * - * However, we ensure the kernel MSR:DE is appropriately cleared too - * to avoid spurrious single step exceptions in the kernel. - * - * This will have to change to merge with the ppc32 code at some point, - * but I don't like much what ppc32 is doing today so there's some - * thinking needed there - */ - if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) { - u32 dbcr0; - - mtmsr(mfmsr() & ~MSR_DE); - isync(); - dbcr0 = mfspr(SPRN_DBCR0); - dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0; - mtspr(SPRN_DBCR0, dbcr0); - } -#endif /* CONFIG_PPC64_BOOK3E */ - #ifdef CONFIG_PPC64 /* * Collect processor utilization data per process -- cgit v1.2.2 From a313f4c55d4952f2105fe33a4957ed858e998359 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Nov 2011 04:51:19 +0000 Subject: powerpc/signal32: Fix sigset_t conversion when copying to user On PPC64, put_sigset_t converts a sigset_t to a compat_sigset_t before copying it to userspace. There is a typo in the case that we have 4 words to copy, meaning that we corrupt the compat_sigset_t. It appears that _NSIG_WORDS can't be greater than 2 at the moment so this code is probably always optimised away anyway. Signed-off-by: Will Deacon Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 78b76dc54dfb..836a5a19eb2c 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -97,7 +97,7 @@ static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) compat_sigset_t cset; switch (_NSIG_WORDS) { - case 4: cset.sig[5] = set->sig[3] & 0xffffffffull; + case 4: cset.sig[6] = set->sig[3] & 0xffffffffull; cset.sig[7] = set->sig[3] >> 32; case 3: cset.sig[4] = set->sig[2] & 0xffffffffull; cset.sig[5] = set->sig[2] >> 32; -- cgit v1.2.2 From 05737c7c5bca9a4f3e0f8bb9476445971b64fafd Mon Sep 17 00:00:00 2001 From: Jason Jin Date: Fri, 28 Oct 2011 16:08:00 +0800 Subject: powerpc/fsl-pci: Don't hide resource for pci/e when configured as Agent/EP Current pci/pcie init code will hide the pci/pcie host resource. But did not judge it is host/RC or agent/EP. If configured as agent/EP, we should avoid hiding its resource in the host side. In PCI system, the Programing Interface can be used to judge the host/agent status: Programing Interface = 0: host Programing Interface = 1: Agent In PCIE system, both the Programing Interface and Header type can be used to judge the RC/EP status. Header Type = 0: EP Header Type = 1: RC Signed-off-by: Jason Jin Signed-off-by: Mingkai Hu Signed-off-by: Jia Hongtao Signed-off-by: Li Yang Signed-off-by: Kumar Gala --- arch/powerpc/kernel/pci-common.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 458ed3bee663..069aa75e7161 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1747,10 +1747,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { int i, class = dev->class >> 8; + /* When configured as agent, programing interface = 1 */ + int prog_if = dev->class & 0xf; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && + (prog_if == 0) && (dev->bus->parent == NULL)) { for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; -- cgit v1.2.2 From 37fb9a0231ee43d42d069863bdfd567fca2b61af Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:17 +0000 Subject: powerpc/time: Handle wrapping of decrementer When re-enabling interrupts we have code to handle edge sensitive decrementers by resetting the decrementer to 1 whenever it is negative. If interrupts were disabled long enough that the decrementer wrapped to positive we do nothing. This means interrupts can be delayed for a long time until it finally goes negative again. While we hope interrupts are never be disabled long enough for the decrementer to go positive, we have a very good test team that can drive any kernel into the ground. The softlockup data we get back from these fails could be seconds in the future, completely missing the cause of the lockup. We already keep track of the timebase of the next event so use that to work out if we should trigger a decrementer exception. Signed-off-by: Anton Blanchard Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 15 ++++++--------- arch/powerpc/kernel/time.c | 9 +++++++++ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c3c46948d94..745c1e7c10fd 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -164,16 +164,13 @@ notrace void arch_local_irq_restore(unsigned long en) */ local_paca->hard_enabled = en; -#ifndef CONFIG_BOOKE - /* On server, re-trigger the decrementer if it went negative since - * some processors only trigger on edge transitions of the sign bit. - * - * BookE has a level sensitive decrementer (latches in TSR) so we - * don't need that + /* + * Trigger the decrementer if we have a pending event. Some processors + * only trigger on edge transitions of the sign bit. We might also + * have disabled interrupts long enough that the decrementer wrapped + * to positive. */ - if ((int)mfspr(SPRN_DEC) < 0) - mtspr(SPRN_DEC, 1); -#endif /* CONFIG_BOOKE */ + decrementer_check_overflow(); /* * Force the delivery of pending soft-disabled interrupts on PS3. diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 522bb1dfc353..5db163c96751 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -889,6 +889,15 @@ static void __init clocksource_init(void) clock->name, clock->mult, clock->shift); } +void decrementer_check_overflow(void) +{ + u64 now = get_tb_or_rtc(); + struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); + + if (now >= decrementer->next_tb) + set_dec(1); +} + static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { -- cgit v1.2.2 From d8afc6fd95496204174f19af0cb39eefee0c3e8a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:18 +0000 Subject: powerpc/time: Use clockevents_calc_mult_shift We can use clockevents_calc_mult_shift instead of doing all the work ourselves. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5db163c96751..fae3094c2a9a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -112,8 +112,6 @@ static void decrementer_set_mode(enum clock_event_mode mode, static struct clock_event_device decrementer_clockevent = { .name = "decrementer", .rating = 200, - .shift = 0, /* To be filled in */ - .mult = 0, /* To be filled in */ .irq = 0, .set_next_event = decrementer_set_next_event, .set_mode = decrementer_set_mode, @@ -913,31 +911,6 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } -static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, - int shift) -{ - uint64_t tmp = ((uint64_t)ticks) << shift; - - do_div(tmp, nsec); - return tmp; -} - -static void __init setup_clockevent_multiplier(unsigned long hz) -{ - u64 mult, shift = 32; - - while (1) { - mult = div_sc64(hz, NSEC_PER_SEC, shift); - if (mult && (mult >> 32UL) == 0UL) - break; - - shift--; - } - - decrementer_clockevent.shift = shift; - decrementer_clockevent.mult = mult; -} - static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; @@ -955,7 +928,8 @@ static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); - setup_clockevent_multiplier(ppc_tb_freq); + clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); + decrementer_clockevent.max_delta_ns = clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = -- cgit v1.2.2 From 11b8633ada8633991e584951d0027f2741162201 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:19 +0000 Subject: powerpc/time: Use clocksource_register_hz Use clocksource_register_hz which calculates the shift/mult factors for us. Also remove the shift = 22 assumption in vsyscall_update - thanks to Paul Mackerras and John Stultz for catching that. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fae3094c2a9a..d204b726a185 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -86,8 +86,6 @@ static struct clocksource clocksource_rtc = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = rtc_read, }; @@ -97,8 +95,6 @@ static struct clocksource clocksource_timebase = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = timebase_read, }; @@ -822,9 +818,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, ++vdso_data->tb_update_count; smp_mb(); - /* XXX this assumes clock->shift == 22 */ - /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - new_tb_to_xs = (u64) mult * 4611686018ULL; + /* 19342813113834067 ~= 2^(20+64) / 1e9 */ + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; do_div(new_stamp_xsec, 1000000000); new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; @@ -875,9 +870,7 @@ static void __init clocksource_init(void) else clock = &clocksource_timebase; - clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift); - - if (clocksource_register(clock)) { + if (clocksource_register_hz(clock, tb_ticks_per_sec)) { printk(KERN_ERR "clocksource: %s is already registered\n", clock->name); return; -- cgit v1.2.2 From 68568add2ca70153cca3dd1858eaa0776821cf75 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:20 +0000 Subject: powerpc/time: Remove unnecessary sanity check of decrementer expiration The clockevents code uses max_delta_ns to avoid calling a clockevent with too large a value. Remove the redundant version of this in the timer_interrupt code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d204b726a185..2eaaa242c2e6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -572,7 +572,6 @@ void timer_interrupt(struct pt_regs * regs) struct pt_regs *old_regs; struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); struct clock_event_device *evt = &decrementer->event; - u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -607,16 +606,9 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - now = get_tb_or_rtc(); - if (now >= decrementer->next_tb) { - decrementer->next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - } else { - now = decrementer->next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); - } + decrementer->next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) -- cgit v1.2.2 From 621692cb7efb6d0e38c62e41844a6360c6719b20 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:21 +0000 Subject: powerpc/time: Fix some style issues Fix some formatting issues and use the DECREMENTER_MAX define instead of 0x7fffffff. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2eaaa242c2e6..b1990b987e2c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -106,12 +106,12 @@ static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); static struct clock_event_device decrementer_clockevent = { - .name = "decrementer", - .rating = 200, - .irq = 0, - .set_next_event = decrementer_set_next_event, - .set_mode = decrementer_set_mode, - .features = CLOCK_EVT_FEAT_ONESHOT, + .name = "decrementer", + .rating = 200, + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT, }; struct decrementer_clock { @@ -435,7 +435,7 @@ EXPORT_SYMBOL(profile_pc); /* * This function recalibrates the timebase based on the 49-bit time-of-day * value in the Titan chip. The Titan is much more accurate than the value - * returned by the service processor for the timebase frequency. + * returned by the service processor for the timebase frequency. */ static int __init iSeries_tb_recal(void) @@ -636,9 +636,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); local_irq_disable(); - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); } static void generic_suspend_enable_irqs(void) @@ -982,10 +982,10 @@ void __init time_init(void) boot_tb = get_tb_or_rtc(); /* If platform provided a timezone (pmac), we correct the time */ - if (timezone_offset) { + if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; - } + } vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; -- cgit v1.2.2 From 7df1027542c9353bef4d027cb4ab8e99f69017b7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Nov 2011 20:07:22 +0000 Subject: powerpc/time: Optimise decrementer_check_overflow decrementer_check_overflow is called from arch_local_irq_restore so we want to make it as light weight as possible. As such, turn decrementer_check_overflow into an inline function. To avoid a circular mess of includes, separate out the two components of struct decrementer_clock and keep the struct clock_event_device part local to time.c. The fast path improves from: arch_local_irq_restore 0: mflr r0 4: std r0,16(r1) 8: stdu r1,-112(r1) c: stb r3,578(r13) 10: cmpdi cr7,r3,0 14: beq- cr7,24 <.arch_local_irq_restore+0x24> ... 24: addi r1,r1,112 28: ld r0,16(r1) 2c: mtlr r0 30: blr to: arch_local_irq_restore 0: std r30,-16(r1) 4: ld r30,0(r2) 8: stb r3,578(r13) c: cmpdi cr7,r3,0 10: beq- cr7,6c <.arch_local_irq_restore+0x6c> ... 6c: ld r30,-16(r1) 70: blr Unfortunately we still setup a local TOC (due to -mminimal-toc). Yet another sign we should be moving to -mcmodel=medium. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 9 +++++++++ arch/powerpc/kernel/time.c | 27 +++++++-------------------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 745c1e7c10fd..2ff4f5e59620 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -115,6 +115,15 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace void decrementer_check_overflow(void) +{ + u64 now = get_tb_or_rtc(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + if (now >= *next_tb) + set_dec(1); +} + notrace void arch_local_irq_restore(unsigned long en) { /* diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b1990b987e2c..9754743db8b9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -114,12 +114,8 @@ static struct clock_event_device decrementer_clockevent = { .features = CLOCK_EVT_FEAT_ONESHOT, }; -struct decrementer_clock { - struct clock_event_device event; - u64 next_tb; -}; - -static DEFINE_PER_CPU(struct decrementer_clock, decrementers); +DEFINE_PER_CPU(u64, decrementers_next_tb); +static DEFINE_PER_CPU(struct clock_event_device, decrementers); #ifdef CONFIG_PPC_ISERIES static unsigned long __initdata iSeries_recal_titan; @@ -570,8 +566,8 @@ void arch_irq_work_raise(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); - struct clock_event_device *evt = &decrementer->event; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -606,7 +602,7 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - decrementer->next_tb = ~(u64)0; + *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); @@ -872,19 +868,10 @@ static void __init clocksource_init(void) clock->name, clock->mult, clock->shift); } -void decrementer_check_overflow(void) -{ - u64 now = get_tb_or_rtc(); - struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); - - if (now >= decrementer->next_tb) - set_dec(1); -} - static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt; + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); return 0; } @@ -898,7 +885,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, static void register_decrementer_clockevent(int cpu) { - struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; + struct clock_event_device *dec = &per_cpu(decrementers, cpu); *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); -- cgit v1.2.2 From fac26ad4f9cb794c9d1032f55f40a31cb55be09a Mon Sep 17 00:00:00 2001 From: Jimi Xenidis Date: Thu, 29 Sep 2011 10:55:13 +0000 Subject: powerpc/book3e: Add ICSWX/ACOP support to Book3e cores like A2 ICSWX is also used by the A2 processor to access coprocessors, although not all "chips" that contain A2s have coprocessors. Signed-off-by: Jimi Xenidis Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_a2.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index 7f818feaa7a5..ebc62f42a237 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -41,11 +41,16 @@ _GLOBAL(__setup_cpu_a2) * core local but doing it always won't hurt */ -#ifdef CONFIG_PPC_WSP_COPRO +#ifdef CONFIG_PPC_ICSWX /* Make sure ACOP starts out as zero */ li r3,0 mtspr SPRN_ACOP,r3 + /* Skip the following if we are in Guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne _icswx_skip_guest + /* Enable icswx instruction */ mfspr r3,SPRN_A2_CCR2 ori r3,r3,A2_CCR2_ENABLE_ICSWX @@ -54,7 +59,8 @@ _GLOBAL(__setup_cpu_a2) /* Unmask all CTs in HACOP */ li r3,-1 mtspr SPRN_HACOP,r3 -#endif /* CONFIG_PPC_WSP_COPRO */ +_icswx_skip_guest: +#endif /* CONFIG_PPC_ICSWX */ /* Enable doorbell */ mfspr r3,SPRN_A2_CCR2 -- cgit v1.2.2 From df17f56d8a1a3a533b6b3e3a49a624626a49b197 Mon Sep 17 00:00:00 2001 From: "Ravi K. Nittala" Date: Mon, 3 Oct 2011 21:49:53 +0000 Subject: powerpc/pseries: Cancel RTAS event scan before firmware flash The RTAS firmware flash update is conducted using an RTAS call that is serialized by lock_rtas() which uses spin_lock. While the flash is in progress, rtasd performs scan for any RTAS events that are generated by the system. rtasd keeps scanning for the RTAS events generated on the machine. This is performed via workqueue mechanism. The rtas_event_scan() also uses an RTAS call to scan the events, eventually trying to acquire the spin_lock before issuing the request. The flash update takes a while to complete and during this time, any other RTAS call has to wait. In this case, rtas_event_scan() waits for a long time on the spin_lock resulting in a soft lockup. Fix: Just before the flash update is performed, the queued rtas_event_scan() work item is cancelled from the work queue so that there is no other RTAS call issued while the flash is in progress. After the flash completes, the system reboots and the rtas_event_scan() is rescheduled. Signed-off-by: Suzuki Poulose Signed-off-by: Ravi Nittala Reported-by: Divya Vikas Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas_flash.c | 6 ++++++ arch/powerpc/kernel/rtasd.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index e037c7494fd8..4174b4b23246 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -567,6 +567,12 @@ static void rtas_flash_firmware(int reboot_type) return; } + /* + * Just before starting the firmware flash, cancel the event scan work + * to avoid any soft lockup issues. + */ + rtas_cancel_event_scan(); + /* * NOTE: the "first" block must be under 4GB, so we create * an entry with no data blocks in the reserved buffer in diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 481ef064c8f1..1045ff49cc6d 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -472,6 +472,13 @@ static void start_event_scan(void) &event_scan_work, event_scan_delay); } +/* Cancel the rtas event scan work */ +void rtas_cancel_event_scan(void) +{ + cancel_delayed_work_sync(&event_scan_work); +} +EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); + static int __init rtas_init(void) { struct proc_dir_entry *entry; -- cgit v1.2.2 From 3b5e16d7ad0cf4b03a0650092a6fdcb27b536a82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Oct 2011 02:30:50 +0000 Subject: powerpc: Mark IPI interrupts IRQF_NO_THREAD IPI handlers cannot be threaded. Remove the obsolete IRQF_DISABLED flag (see commit e58aa3d2) while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6df70907d60a..f0abe92f63f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -187,7 +187,8 @@ int smp_request_message_ipi(int virq, int msg) return 1; } #endif - err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU, + err = request_irq(virq, smp_ipi_action[msg], + IRQF_PERCPU | IRQF_NO_THREAD, smp_ipi_name[msg], 0); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); -- cgit v1.2.2 From 491b98c315dbe39b20bd4a24a6179c42349f42c0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 6 Nov 2011 18:55:57 +0000 Subject: powerpc/pci: Add a platform hook after probe and before resource survey Some platforms need to perform resource allocation using a custom algorithm due to HW constraints, or may want to tweak things globally below a host bridge. For example OPAL support for IODA will need to perform a resource allocation pass that applies IODA specific segmentation constraints to MMIO which cannot be done simply using the kernel generic resource management code. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 458ed3bee663..f5b753de3718 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1732,6 +1732,12 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) if (mode == PCI_PROBE_NORMAL) hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + /* Platform gets a chance to do some global fixups before + * we proceed to resource allocation + */ + if (ppc_md.pcibios_fixup_phb) + ppc_md.pcibios_fixup_phb(hose); + /* Configure PCI Express settings */ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; -- cgit v1.2.2 From 48c2ce97fa406607ca5e11a76cf507d171452dd9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 6 Nov 2011 18:55:58 +0000 Subject: powerpc/pci: Change how re-assigning resouces work When PCI_REASSIGN_ALL_RSRC is set, we used to clear all bus resources at the beginning of survey and re-allocate them later. This changes it so instead, during early fixup, we mark all resources as IORESOURCE_UNSET and move them down to be 0-based. Later, if bus resources are still unset at the beginning of the survey, then we clear them. This shouldn't impact the re-assignment case on 4xx, but will enable us to have the platform do some custom resource assignment before the survey, by clearing individual resources IORESOURCE_UNSET bit. Also limits the clutter in the kernel log from fixup when re-assigning since we don't care about the offset applied to the BAR values in this case. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 66 ++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f5b753de3718..8f428fff545f 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -921,18 +921,22 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) struct resource *res = dev->resource + i; if (!res->flags) continue; - /* On platforms that have PCI_PROBE_ONLY set, we don't - * consider 0 as an unassigned BAR value. It's technically - * a valid value, but linux doesn't like it... so when we can - * re-assign things, we do so, but if we can't, we keep it - * around and hope for the best... + + /* If we're going to re-assign everything, we mark all resources + * as unset (and 0-base them). In addition, we mark BARs starting + * at 0 as unset as well, except if PCI_PROBE_ONLY is also set + * since in that case, we don't want to re-assign anything */ - if (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY)) { - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] is unassigned\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || + (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + /* Only print message if not re-assigning */ + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " + "is unassigned\n", + pci_name(dev), i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned int)res->flags); res->end -= res->start; res->start = 0; res->flags |= IORESOURCE_UNSET; @@ -1042,6 +1046,16 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) if (i >= 3 && bus->self->transparent) continue; + /* If we are going to re-assign everything, mark the resource + * as unset and move it down to 0 + */ + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + res->flags |= IORESOURCE_UNSET; + res->end -= res->start; + res->start = 0; + continue; + } + pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ @@ -1262,18 +1276,15 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pci_bus_for_each_resource(bus, res, i) { if (!res || !res->flags || res->start > res->end || res->parent) continue; + + /* If the resource was left unset at this point, we clear it */ + if (res->flags & IORESOURCE_UNSET) + goto clear_resource; + if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? &ioport_resource : &iomem_resource; else { - /* Don't bother with non-root busses when - * re-assigning all resources. We clear the - * resource flags as if they were colliding - * and as such ensure proper re-allocation - * later. - */ - if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) - goto clear_resource; pr = pci_find_parent_resource(bus->self, res); if (pr == res) { /* this happens when the generic PCI @@ -1304,9 +1315,9 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) if (reparent_resources(pr, res) == 0) continue; } - printk(KERN_WARNING "PCI: Cannot allocate resource region " - "%d of PCI bridge %d, will remap\n", i, bus->number); -clear_resource: + pr_warning("PCI: Cannot allocate resource region " + "%d of PCI bridge %d, will remap\n", i, bus->number); + clear_resource: res->start = res->end = 0; res->flags = 0; } @@ -1451,16 +1462,11 @@ void __init pcibios_resource_survey(void) { struct pci_bus *b; - /* Allocate and assign resources. If we re-assign everything, then - * we skip the allocate phase - */ + /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - - if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - } + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the -- cgit v1.2.2 From 184cd4a3b962a4769889615430eaf40076b97969 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Nov 2011 17:29:08 +0000 Subject: powerpc/powernv: PCI support for p7IOC under OPAL v2 This adds support for p7IOC (and possibly other IODA v1 IO Hubs) using OPAL v2 interfaces. We completely take over resource assignment and assign them using an algorithm that hands out device BARs in a way that makes them fit in individual segments of the M32 window of the bridge, which enables us to assign individual PEs to devices and functions. The current implementation gives out a PE per functions on PCIe, and a PE for the entire bridge for PCIe to PCI-X bridges. This can be adjusted / fine tuned later. We also setup DMA resources (32-bit only for now) and MSIs (both 32-bit and 64-bit MSI are supported). The DMA allocation tries to divide the available 256M segments of the 32-bit DMA address space "fairly" among PEs. This is done using a "weight" heuristic which assigns less value to things like OHCI USB controllers than, for example SCSI RAID controllers. This algorithm will probably want some fine tuning for specific devices or device types. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_dn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e69deb89b37..dd9e4a04bf79 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) dn->data = pdn; pdn->node = dn; pdn->phb = phb; +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ -- cgit v1.2.2 From 595fe91447b03cb72c97f45bc5db30fd73b66b38 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Thu, 10 Nov 2011 19:58:53 +0000 Subject: powerpc: Export PIR data through sysfs On Fri, Nov 11, 2011 at 10:17:55AM +0530, Ananth N Mavinakayanahalli wrote: > > > > At this rate we're going to end up with no bits left for CPU features > > way too quickly... Especially for something we only care about once at > > boot time. > > > > Wouldn't CPU_FTR_PPCAS_ARCH_V2 be a good enough test ? > > /me checks Cell manuals... yes, that test would be good enough. I will > cook up a patch to use this. Here it is... Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ce035c1905f0..f579be552094 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -177,11 +177,13 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_PMCSETUP(purr, SPRN_PURR); SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); +SYSFS_PMCSETUP(pir, SPRN_PIR); static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +static SYSDEV_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); @@ -392,6 +394,9 @@ static void __cpuinit register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_create_file(s, &attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + sysdev_create_file(s, &attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_online(cpu); @@ -462,6 +467,9 @@ static void unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_remove_file(s, &attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + sysdev_remove_file(s, &attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_offline(cpu); -- cgit v1.2.2 From 3bfd0c9c8f9cd2c09cf3e5376c7113eec3370ebd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 24 Nov 2011 19:35:57 +0000 Subject: powerpc: Decode correct MSR bits in oops output On a 64bit book3s machine I have an oops from a system reset that claims the book3e CE bit was set: MSR: 8000000000021032 CR: 24004082 XER: 00000010 On a book3s machine system reset sets IBM bit 46 and 47 depending on the power saving mode. Separate the definitions by type and for completeness add the rest of the bits in. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6457574c0b2f..ebe5766781aa 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -584,16 +584,32 @@ static struct regbit { unsigned long bit; const char *name; } msr_bits[] = { +#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) + {MSR_SF, "SF"}, + {MSR_HV, "HV"}, +#endif + {MSR_VEC, "VEC"}, + {MSR_VSX, "VSX"}, +#ifdef CONFIG_BOOKE + {MSR_CE, "CE"}, +#endif {MSR_EE, "EE"}, {MSR_PR, "PR"}, {MSR_FP, "FP"}, - {MSR_VEC, "VEC"}, - {MSR_VSX, "VSX"}, {MSR_ME, "ME"}, - {MSR_CE, "CE"}, +#ifdef CONFIG_BOOKE {MSR_DE, "DE"}, +#else + {MSR_SE, "SE"}, + {MSR_BE, "BE"}, +#endif {MSR_IR, "IR"}, {MSR_DR, "DR"}, + {MSR_PMM, "PMM"}, +#ifndef CONFIG_BOOKE + {MSR_RI, "RI"}, + {MSR_LE, "LE"}, +#endif {0, NULL} }; -- cgit v1.2.2 From d5b9ee7b514ee2f3df649fe38d01494ad7a8b956 Mon Sep 17 00:00:00 2001 From: Tanmay Inamdar Date: Mon, 28 Nov 2011 21:01:41 +0000 Subject: powerpc/40x: Add APM8018X SOC support The AppliedMicro APM8018X embedded processor targets embedded applications that require low power and a small footprint. It features a PowerPC 405 processor core built in a 65nm low-power CMOS process with a five-stage pipeline executing up to one instruction per cycle. The family has 128-kbytes of on-chip memory, a 128-bit local bus and on-chip DDR2 SDRAM controller with 16-bit interface. Signed-off-by: Tanmay Inamdar Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index edae5bb06f1f..ce59693835c7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1505,6 +1505,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, + { /* APM8018X */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff11432, + .cpu_name = "APM8018X", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, -- cgit v1.2.2 From a6146888be0aa80ea41c99178d7d2e08efc776b5 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 10 Oct 2011 10:50:43 +0000 Subject: powerpc: Add gpages reservation code for 64-bit FSL BOOKE For 64-bit FSL_BOOKE implementations, gigantic pages need to be reserved at boot time by the memblock code based on the command line. This adds the call that handles the reservation, and fixes some code comments. It also removes the previous pr_err when reserve_hugetlb_gpages is called on a system without hugetlb enabled - the way the code is structured, the call is unconditional and the resulting error message spurious and confusing. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fb9bb46e7e88..4cb8f1e9d044 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,6 +35,8 @@ #include #include #include +#include + #include #include #include @@ -64,6 +66,7 @@ #include #include #include +#include #include "setup.h" @@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + DBG(" <- early_setup()\n"); } -- cgit v1.2.2 From 40dfef66a9e3d4a308c3ed7355c9a89e68c08ffc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 29 Nov 2011 18:22:56 +0000 Subject: powerpc/powernv: Workaround OFW issues in prom_init.c Open Firmware on OPAL machines seems to have issues if we close stdin and/or we try to print things after calling "quiesce" so we avoid doing both. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index cc584865b3df..f4f9f2f14322 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2969,9 +2969,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * in case stdin is USB and still active on IBM machines... * Unfortunately quiesce crashes on some powermacs if we have - * closed stdin already (in particular the powerbook 101). + * closed stdin already (in particular the powerbook 101). It + * appears that the OPAL version of OFW doesn't like it either. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) prom_close_stdin(); /* @@ -2987,8 +2989,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * is common to us and kexec */ hdr = RELOC(dt_header_start); - prom_printf("returning from prom_init\n"); - prom_debug("->dt_header_start=0x%x\n", hdr); + + /* Don't print anything after quiesce under OPAL, it crashes OFW */ + if (RELOC(of_platform) != PLATFORM_OPAL) { + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", hdr); + } #ifdef CONFIG_PPC32 reloc_got2(-offset); -- cgit v1.2.2 From 4666ca2aa344105da0da3afda48d987c82261c05 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 29 Nov 2011 20:16:25 +0000 Subject: powerpc/pci: Make pci_read_irq_line() static It's only used inside the same file where it's defined. There's also no point exporting it anymore. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 9bffc028f45a..fa4a573d6716 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -214,7 +214,7 @@ char __devinit *pcibios_setup(char *str) * If the interrupt is used, then gets the interrupt line from the * openfirmware and sets it in the pci_dev and pci_config line. */ -int pci_read_irq_line(struct pci_dev *pci_dev) +static int pci_read_irq_line(struct pci_dev *pci_dev) { struct of_irq oirq; unsigned int virq; @@ -283,7 +283,6 @@ int pci_read_irq_line(struct pci_dev *pci_dev) return 0; } -EXPORT_SYMBOL(pci_read_irq_line); /* * Platform support for /proc/bus/pci/X/Y mmap()s, -- cgit v1.2.2 From 771dae81896855d25f7f8746aaf56c0238deafb6 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:31 +0000 Subject: powerpc/cpuidle: Add cpu_idle_wait() to allow switching of idle routines This patch provides cpu_idle_wait() routine for the powerpc platform which is required by the cpuidle subsystem. This routine is required to change the idle handler on SMP systems. The equivalent routine for x86 is in arch/x86/kernel/process.c but the powerpc implementation is different. cpuidle_disable variable is to enable/disable cpuidle framework if power_save option is set during the boot time. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/idle.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..8574b0e81ff1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -39,9 +39,13 @@ #define cpu_should_die() 0 #endif +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; return 0; } __setup("powersave=off", powersave_off); @@ -102,6 +106,29 @@ void cpu_idle(void) } } + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + int cpu; + smp_mb(); + + /* kick all the CPUs so that they exit out of old idle routine */ + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + smp_send_reschedule(cpu); + } + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + int powersave_nap; #ifdef CONFIG_SYSCTL -- cgit v1.2.2 From 707827f3387d9b260d50fa697885a4042cea3bf4 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:42 +0000 Subject: powerpc/cpuidle: cpuidle driver for pSeries This patch implements a back-end cpuidle driver for pSeries based on pseries_dedicated_idle_loop and pseries_shared_idle_loop routines. The driver is built only if CONFIG_CPU_IDLE is set. This cpuidle driver uses global registration of idle states and not per-cpu. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f579be552094..6fdf5ffe8c44 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cacheinfo.h" @@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + update_smt_snooze_delay(snooze); return count; } -- cgit v1.2.2 From 58154c8ce71a7854d969d73468fd00e5eeeab708 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:09 +0000 Subject: powerpc: Give us time to get all oopses out before panicking I've been seeing truncated output when people send system reset info to me. We should see a backtrace for every CPU, but the panic() code takes the box down before they all make it out to the console. The panic code runs unlocked so we also see corrupted console output. If we are going to panic, then delay 1 second before calling into the panic code. Move oops_exit inside the die lock and put a newline between oopses for clarity. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5459d148a0f6..91377870b26a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -158,6 +158,8 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); + oops_exit(); + printk("\n"); raw_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || @@ -165,13 +167,23 @@ int die(const char *str, struct pt_regs *regs, long err) crash_kexec(regs); crash_kexec_secondary(regs); + /* + * While our oops output is serialised by a spinlock, output + * from panic() called below can race and corrupt it. If we + * know we are going to panic, delay for 1 second so we have a + * chance to get clean backtraces from all CPUs that are oopsing. + */ + if (in_interrupt() || panic_on_oops || !current->pid || + is_global_init(current)) { + mdelay(MSEC_PER_SEC); + } + if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - oops_exit(); do_exit(err); return 0; -- cgit v1.2.2 From 9b00ac06978c54788f13eefd34a07b77db48d567 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:10 +0000 Subject: powerpc: Remove broken and complicated kdump system reset code We have a lot of complicated logic that handles possible recursion between kdump and a system reset exception. We can solve this in a much simpler way using the same setjmp/longjmp tricks xmon does. As a first step, this patch removes the old system reset code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 87 +++++++-------------------------------------- arch/powerpc/kernel/traps.c | 33 +++++++---------- 2 files changed, 25 insertions(+), 95 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d879809d5c45..3d87b205d5f5 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -47,7 +47,6 @@ /* This keeps a track of which one is crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ @@ -55,7 +54,6 @@ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); void crash_ipi_callback(struct pt_regs *regs) { @@ -69,24 +67,9 @@ void crash_ipi_callback(struct pt_regs *regs) crash_save_cpu(regs, cpu); cpumask_set_cpu(cpu, &cpus_in_crash); - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { - cpumask_clear_cpu(cpu, &cpus_in_sr); - atomic_inc(&enter_on_soft_reset); - } - /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); @@ -103,25 +86,14 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpumask_clear_cpu(cpu, &cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + printk(KERN_EMERG "Sending IPI to other CPUs\n"); + crash_send_ipi(crash_ipi_callback); smp_wmb(); @@ -131,7 +103,6 @@ static void crash_kexec_prepare_cpus(int cpu) * respond. * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); @@ -140,69 +111,36 @@ static void crash_kexec_prepare_cpus(int cpu) /* Would it be better to replace the trap vector here? */ - /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. - */ if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", + printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", ncpus - cpumask_weight(&cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpumask_clear(&cpus_in_sr); - atomic_set(&enter_on_soft_reset, 0); - while (cpumask_weight(&cpus_in_crash) < ncpus) - cpu_relax(); } - /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + + printk(KERN_EMERG "IPI complete\n"); } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = 500; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait 500ms for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpumask_clear_cpu(cpu, &cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); cpu_relax(); } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ - } + crash_ipi_callback(regs); } @@ -225,7 +163,6 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 91377870b26a..014f88f03d3f 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -162,10 +162,20 @@ int die(const char *str, struct pt_regs *regs, long err) printk("\n"); raw_spin_unlock_irqrestore(&die.lock, flags); - if (kexec_should_crash(current) || - kexec_sr_activated(smp_processor_id())) + /* + * A system reset (0x100) is a request to dump, so we always send + * it through the crashdump code. + */ + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { crash_kexec(regs); - crash_kexec_secondary(regs); + + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); + } /* * While our oops output is serialised by a spinlock, output @@ -232,25 +242,8 @@ void system_reset_exception(struct pt_regs *regs) return; } -#ifdef CONFIG_KEXEC - cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); -#endif - die("System Reset", regs, SIGABRT); - /* - * Some CPUs when released from the debugger will execute this path. - * These CPUs entered the debugger via a soft-reset. If the CPU was - * hung before entering the debugger it will return to the hung - * state when exiting this function. This causes a problem in - * kdump since the hung CPU(s) will not respond to the IPI sent - * from kdump. To prevent the problem we call crash_kexec_secondary() - * here. If a kdump had not been initiated or we exit the debugger - * with the "exit and recover" command (x) crash_kexec_secondary() - * will return after 5ms and the CPU returns to its previous state. - */ - crash_kexec_secondary(regs); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); -- cgit v1.2.2 From 07fe0c6132578186773e01ffb0f63ded222effe7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:11 +0000 Subject: powerpc/kdump: Use setjmp/longjmp to handle kdump and system reset recursion We can handle recursion caused by system reset by reusing the crash shutdown fault handler. Since we don't have an OS triggerable NMI, if all CPUs don't make it into kdump then we tell the user to issue a system reset. However if we have a panic timeout set we cannot wait forever and must continue the kdump. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 72 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d87b205d5f5..a8b6e2d705a4 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -53,6 +53,16 @@ static cpumask_t cpus_in_crash = CPU_MASK_NONE; static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP void crash_ipi_callback(struct pt_regs *regs) @@ -89,14 +99,16 @@ void crash_ipi_callback(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to @@ -111,12 +123,52 @@ static void crash_kexec_prepare_cpus(int cpu) /* Would it be better to replace the trap vector here? */ - if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); + if (cpumask_weight(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - cpumask_weight(&cpus_in_crash)); + + /* + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. + */ + if ((panic_timeout > 0) || (tries > 0)) + return; + + /* + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. + */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + cpus_in_crash = CPU_MASK_NONE; + smp_mb(); + + while (cpumask_weight(&cpus_in_crash) < ncpus) + cpu_relax(); } - printk(KERN_EMERG "IPI complete\n"); + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* @@ -245,16 +297,6 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; -- cgit v1.2.2 From 8c27474a252e84e8a71ae4a43e18f0193a08e3f7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:12 +0000 Subject: powerpc: Cleanup crash/kexec code Remove some unnecessary defines and fix some spelling mistakes. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index a8b6e2d705a4..d4467557b00e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,41 +10,27 @@ * */ -#undef DEBUG - #include #include #include #include -#include #include #include #include -#include -#include #include #include #include -#include #include #include #include #include #include -#include #include #include #include -#ifdef DEBUG -#include -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* This keeps a track of which one is crashing cpu. */ +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; @@ -201,7 +187,7 @@ void crash_kexec_secondary(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -302,7 +288,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -328,7 +313,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; -- cgit v1.2.2 From 760ca4dc90e624eb8f7ff85a5925151e25577758 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:13 +0000 Subject: powerpc: Rework die() Our die() code was based off a very old x86 version. Update it to mirror the current x86 code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 128 +++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 55 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 014f88f03d3f..c091527efd89 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif -int die(const char *str, struct pt_regs *regs, long err) +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; + int cpu; unsigned long flags; if (debugger(regs)) @@ -117,50 +113,37 @@ int die(const char *str, struct pt_regs *regs, long err) oops_enter(); - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_spin_lock_irqsave(&die.lock, flags); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - if (machine_is(powermac)) - pmac_backlight_unblank(); - } else { - local_save_flags(flags); - } - - if (++die.lock_owner_depth < 3) { - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); - - if (notify_die(DIE_OOPS, str, regs, err, 255, - SIGSEGV) == NOTIFY_STOP) - return 1; - - print_modules(); - show_regs(regs); - } else { - printk("Recursive die() failure, output suppressed\n"); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ bust_spinlocks(0); - die.lock_owner = -1; + die_owner = -1; add_taint(TAINT_DIE); + die_nest_count--; oops_exit(); printk("\n"); - raw_spin_unlock_irqrestore(&die.lock, flags); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); /* * A system reset (0x100) is a request to dump, so we always send @@ -177,6 +160,9 @@ int die(const char *str, struct pt_regs *regs, long err) crash_kexec_secondary(regs); } + if (!signr) + return; + /* * While our oops output is serialised by a spinlock, output * from panic() called below can race and corrupt it. If we @@ -190,15 +176,46 @@ int die(const char *str, struct pt_regs *regs, long err) if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); + do_exit(signr); +} - do_exit(err); +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); return 0; } +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { @@ -217,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } else if (show_unhandled_signals && - unhandled_signal(current, signr)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); -- cgit v1.2.2 From 549e88a134b3b393a4312e8d76628b9260eee57f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:16 +0000 Subject: powerpc/kdump: Delay before sending IPI on a system reset If we enter the kdump code via system reset, wait a bit before sending the IPI to capture all secondary CPUs. Without it we race with the hypervisor that is issuing the system reset to each CPU. If the IPI gets there first the system reset oops output then shows the register state of the IPI handler which is not what we want. I took the opportunity to add defines for all the various delays we have. There's no need for cpu_relax when we are doing an mdelay, so remove them too. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d4467557b00e..b942980e9650 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -30,6 +30,20 @@ #include #include +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 + +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; @@ -99,11 +113,9 @@ again: * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - msecs = 10000; + msecs = IPI_TIMEOUT; while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); mdelay(1); } @@ -163,11 +175,11 @@ again: void crash_kexec_secondary(struct pt_regs *regs) { unsigned long flags; - int msecs = 500; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 500ms for the primary crash CPU to signal its progress */ + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { /* No response, kdump image may not have been loaded */ @@ -176,7 +188,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } mdelay(1); - cpu_relax(); } crash_ipi_callback(regs); @@ -211,7 +222,7 @@ static void crash_kexec_wait_realmode(int cpu) unsigned int msecs; int i; - msecs = 10000; + msecs = REAL_MODE_TIMEOUT; for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; @@ -306,6 +317,14 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + crash_kexec_prepare_cpus(crashing_cpu); cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); -- cgit v1.2.2 From 2440c01e10f07adcbc2094ba12ae4ad6094bd2b6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:17 +0000 Subject: powerpc/kdump: Only save CPU state first time through the secondary CPU capture code We might enter the secondary CPU capture code twice, eg if we have to unstick some CPUs with a system reset. In this case we don't want to overwrite the state on CPUs that had made it into the capture code OK, so use the cpus_state_saved cpumask for that and make it local to crash_ipi_callback. For controlling progress now use atomic_t cpus_in_crash to count how many CPUs have made it into the kdump code, and time_to_dump to tell everyone it's time to dump. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index b942980e9650..28be3452e67a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,7 +46,8 @@ /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; +static atomic_t cpus_in_crash; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ @@ -67,21 +68,27 @@ static int handle_fault(struct pt_regs *regs) void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -115,19 +122,18 @@ again: * respond. */ msecs = IPI_TIMEOUT; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ - if (cpumask_weight(&cpus_in_crash) >= ncpus) { + if (atomic_read(&cpus_in_crash) >= ncpus) { printk(KERN_EMERG "IPI complete\n"); return; } printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); + ncpus - atomic_read(&cpus_in_crash)); /* * If we have a panic timeout set then we can't wait indefinitely @@ -155,10 +161,10 @@ again: * crash code again. We need to reset cpus_in_crash so we * wait for everyone to do this. */ - cpus_in_crash = CPU_MASK_NONE; + atomic_set(&cpus_in_crash, 0); smp_mb(); - while (cpumask_weight(&cpus_in_crash) < ncpus) + while (atomic_read(&cpus_in_crash) < ncpus) cpu_relax(); } @@ -316,7 +322,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); /* * If we came in via system reset, wait a while for the secondary @@ -326,7 +331,11 @@ void default_machine_crash_shutdown(struct pt_regs *regs) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); -- cgit v1.2.2 From 816cb49a4baf0e4b27730bcc31e5d35a1eadd283 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 29 Nov 2011 15:38:50 +0000 Subject: powerpc/ps3: Fix hcall lv1_get_version_info The lv1_get_version_info hcall takes 2, not 1 output arguments. Adjust the lv1 hcall table and all calls. Usage: int lv1_get_version_info(u64 *version_number, u64 *vendor_id) Signed-off-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2ff4f5e59620..701d4aceb4f4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -186,8 +186,8 @@ notrace void arch_local_irq_restore(unsigned long en) * Any HV call will have this side effect. */ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp; - lv1_get_version_info(&tmp); + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } __hard_irq_enable(); -- cgit v1.2.2 From 33392640424ff775e7d82eab4a51af7b8cc9384d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 4 Dec 2011 13:13:58 +0000 Subject: powerpc/pseries: Increase minimum RMO size from 64MB to 256MB The minimum RMO size field in ibm,client-architecture is currently ignored, but a future firmware version will rectify that. Since we always get at least 128MB of RMO right now, asking for 64MB is likely to result in boot failures. We should bump it to at least 128MB, but considering all the boot issues we have on 128MB RMO boxes and all new machines have virtual RMO, we may as well set our minimum to 256MB. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f4f9f2f14322..df47316f1aee 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -742,7 +742,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), /* virt_base */ W(0xffffffff), /* virt_size */ W(0xffffffff), /* load_base */ - W(64), /* 64MB min RMA */ + W(256), /* 256MB min RMA */ W(0xffffffff), /* full client load */ 0, /* min RMA percentage of total RAM */ 48, /* max log_2(hash table size) */ -- cgit v1.2.2 From cba313da5c8ddbe6bba74c9f2b8f6d9e0bc0e723 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 5 Dec 2011 19:35:32 +0000 Subject: powerpc/powernv: Fix problems in onlining CPUs At present, on the powernv platform, if you off-line a CPU that was online, and then try to on-line it again, the kernel generates a warning message "OPAL Error -1 starting CPU n". Furthermore, if the CPU is a secondary thread that was used by KVM while it was off-line, the CPU fails to come online. The first problem is fixed by only calling OPAL to start the CPU the first time it is on-lined, as indicated by the cpu_start field of its PACA being zero. The second problem is fixed by restoring the cpu_start field to 1 instead of 0 when using the CPU within KVM. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf9c69b9189c..d4be7bb3dbdf 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -65,7 +65,7 @@ BEGIN_FTR_SECTION lbz r0,PACAPROCSTART(r13) cmpwi r0,0x80 bne 1f - li r0,0 + li r0,1 stb r0,PACAPROCSTART(r13) b kvm_start_guest 1: -- cgit v1.2.2 From 2fde6d20bb75b53f1ead383b4713f95d0d6d9f59 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 5 Dec 2011 19:47:26 +0000 Subject: powerpc: Provide a way for KVM to indicate that NV GPR values are lost This fixes a problem where a CPU thread coming out of nap mode can think it has valid values in the nonvolatile GPRs (r14 - r31) as saved away in power7_idle, but in fact the values have been trashed because the thread was used for KVM in the mean time. The result is that the thread crashes because code that called power7_idle (e.g., pnv_smp_cpu_kill_self()) goes to use values in registers that have been trashed. The bit field in SRR1 that tells whether state was lost only reflects the most recent nap, which may not have been the nap instruction in power7_idle. So we need an extra PACA field to indicate that state has been lost even if SRR1 indicates that the most recent nap didn't lose state. We clear this field when saving the state in power7_idle, we set it to a non-zero value when we use the thread for KVM, and we test it in power7_wakeup_noloss. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/idle_power7.S | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7c5324f1ec9c..04caee7d9bc1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -208,6 +208,7 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); + DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 3a70845a51c7..fcdff198da4b 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -54,6 +54,7 @@ _GLOBAL(power7_idle) li r0,0 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ stb r0,PACAHARDIRQEN(r13) + stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ SAVE_GPR(2, r1) @@ -86,6 +87,9 @@ _GLOBAL(power7_wakeup_loss) rfid _GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne .power7_wakeup_loss ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) -- cgit v1.2.2 From fe091c208a40299fba40e62292a610fb91e44b4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Kill memblock_init() memblock_init() initializes arrays for regions and memblock itself; however, all these can be done with struct initializers and memblock_init() can be removed. This patch kills memblock_init() and initializes memblock with struct initializer. The only difference is that the first dummy entries don't have .nid set to MAX_NUMNODES initially. This doesn't cause any behavior difference. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- arch/powerpc/kernel/prom.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fa1235b0503b..a7ee83e6eb17 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -733,8 +733,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - memblock_init(); - of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); -- cgit v1.2.2 From 6fbef13c4feaf0c5576e2315f4d2999c4b670c88 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: powerpc: Cleanup memblock usage * early_init_devtree(): Total memory size is aligned to PAGE_SIZE; however, alignment isn't enforced if memory_limit is explicitly specified. Simplify the logic and always apply PAGE_SIZE alignment. * MMU_init(): memblock regions is truncated by directly modifying memblock.memory.cnt. This is incomplete (reserved array is not truncated) and unnecessarily low level hindering further memblock improvments. Use memblock_enforce_memory_limit() instead. * wii_memory_fixups(): Unnecessarily low level direct manipulation of memblock regions. The same result can be achieved using properly abstracted operations. Reimplement using memblock API. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- arch/powerpc/kernel/prom.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a7ee83e6eb17..28500d4f29d9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -754,17 +754,12 @@ void __init early_init_devtree(void *params) early_reserve_mem(); phyp_dump_reserve_mem(); - limit = memory_limit; - if (! limit) { - phys_addr_t memsize; - - /* Ensure that total memory size is page-aligned, because - * otherwise mark_bootmem() gets upset. */ - memblock_analyze(); - memsize = memblock_phys_mem_size(); - if ((memsize & PAGE_MASK) != memsize) - limit = memsize & PAGE_MASK; - } + /* + * Ensure that total memory size is page-aligned, because otherwise + * mark_bootmem() gets upset. + */ + memblock_analyze(); + limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); memblock_analyze(); -- cgit v1.2.2 From 1aadc0560f46530f8a0f11055285b876a8a31770 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: s/memblock_analyze()/memblock_allow_resize()/ and update users The only function of memblock_analyze() is now allowing resize of memblock region arrays. Rename it to memblock_allow_resize() and update its users. * The following users remain the same other than renaming. arm/mm/init.c::arm_memblock_init() microblaze/kernel/prom.c::early_init_devtree() powerpc/kernel/prom.c::early_init_devtree() openrisc/kernel/prom.c::early_init_devtree() sh/mm/init.c::paging_init() sparc/mm/init_64.c::paging_init() unicore32/mm/init.c::uc32_memblock_init() * In the following users, analyze was used to update total size which is no longer necessary. powerpc/kernel/machine_kexec.c::reserve_crashkernel() powerpc/kernel/prom.c::early_init_devtree() powerpc/mm/init_32.c::MMU_init() powerpc/mm/tlb_nohash.c::__early_init_mmu() powerpc/platforms/ps3/mm.c::ps3_mm_add_memory() powerpc/platforms/embedded6xx/wii.c::wii_memory_fixups() sh/kernel/machine_kexec.c::reserve_crashkernel() * x86/kernel/e820.c::memblock_x86_fill() was directly setting memblock_can_resize before populating memblock and calling analyze afterwards. Call memblock_allow_resize() before start populating. memblock_can_resize is now static inside memblock.c. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- arch/powerpc/kernel/machine_kexec.c | 3 --- arch/powerpc/kernel/prom.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 9ce1672afb59..a2158a395d96 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -107,9 +107,6 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of memblock_phys_mem_size() */ - memblock_analyze(); - /* use common parsing */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 28500d4f29d9..abe405dab34d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -758,11 +758,10 @@ void __init early_init_devtree(void *params) * Ensure that total memory size is page-aligned, because otherwise * mark_bootmem() gets upset. */ - memblock_analyze(); limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); -- cgit v1.2.2 From df777bd39a266637d1765d48043493489418e75b Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Wed, 30 Nov 2011 21:39:23 +0000 Subject: powerpc/476fpe: Add 476fpe SoC code Based on original work by David 'Shaggy' Kleikamp. Signed-off-by: Tony Breeds Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 14 ++++++++++++++ arch/powerpc/kernel/head_44x.S | 2 ++ 2 files changed, 16 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index ce59693835c7..81db9e2a8a20 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1843,6 +1843,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* 476 iss */ .pvr_mask = 0xffff0000, .pvr_value = 0x00050000, diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b725dab0f88a..bb7a9c7a4c05 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -732,6 +732,8 @@ _GLOBAL(init_cpu_state) /* We use the PVR to differenciate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq head_start_47x cmplwi cr0,r3,PVR_476@h beq head_start_47x cmplwi cr0,r3,PVR_476_ISS@h -- cgit v1.2.2 From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:06 +0200 Subject: nohz: Separate out irq exit and idle loop dyntick logic The tick_nohz_stop_sched_tick() function, which tries to delay the next timer tick as long as possible, can be called from two places: - From the idle loop to start the dytick idle mode - From interrupt exit if we have interrupted the dyntick idle mode, so that we reprogram the next tick event in case the irq changed some internal state that requires this action. There are only few minor differences between both that are handled by that function, driven by the ts->inidle cpu variable and the inidle parameter. The whole guarantees that we only update the dyntick mode on irq exit if we actually interrupted the dyntick idle mode, and that we enter in RCU extended quiescent state from idle loop entry only. Split this function into: - tick_nohz_idle_enter(), which sets ts->inidle to 1, enters dynticks idle mode unconditionally if it can, and enters into RCU extended quiescent state. - tick_nohz_irq_exit() which only updates the dynticks idle mode when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called). To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed into tick_nohz_idle_exit(). This simplifies the code and micro-optimize the irq exit path (no need for local_irq_save there). This also prepares for the split between dynticks and rcu extended quiescent state logics. We'll need this split to further fix illegal uses of RCU in extended quiescent states in the idle loop. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/powerpc/kernel/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..878572f70ac5 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.2 From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Oct 2011 16:01:00 +0200 Subject: nohz: Allow rcu extended quiescent state handling seperately from tick stop It is assumed that rcu won't be used once we switch to tickless mode and until we restart the tick. However this is not always true, as in x86-64 where we dereference the idle notifiers after the tick is stopped. To prepare for fixing this, add two new APIs: tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu(). If no use of RCU is made in the idle loop between tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch must instead call the new *_norcu() version such that the arch doesn't need to call rcu_idle_enter() and rcu_idle_exit(). Otherwise the arch must call tick_nohz_enter_idle() and tick_nohz_exit_idle() and also call explicitly: - rcu_idle_enter() after its last use of RCU before the CPU is put to sleep. - rcu_idle_exit() before the first use of RCU after the CPU is woken up. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney --- arch/powerpc/kernel/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 878572f70ac5..2e782a36d8f2 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.2 From a7b152d5342c06e81ab0cf7d18345f69fa7e56b5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 Oct 2011 19:05:12 -0700 Subject: powerpc: Tell RCU about idle after hcall tracing The PowerPC pSeries platform (CONFIG_PPC_PSERIES=y) enables hypervisor-call tracing for CONFIG_TRACEPOINTS=y kernels. One of the hypervisor calls that is traced is the H_CEDE call in the idle loop that tells the hypervisor that this OS instance no longer needs the current CPU. However, tracing uses RCU, so this combination of kernel configuration variables needs to avoid telling RCU about the current CPU's idleness until after the H_CEDE-entry tracing completes on the one hand, and must tell RCU that the the current CPU is no longer idle before the H_CEDE-exit tracing starts. In all other cases, it suffices to inform RCU of CPU idleness upon idle-loop entry and exit. This commit makes the required adjustments. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/powerpc/kernel/idle.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 2e782a36d8f2..3cd73d1fc427 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -46,6 +46,12 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) +static const bool idle_uses_rcu = 1; +#else +static const bool idle_uses_rcu; +#endif + /* * The body of the idle task. */ @@ -56,7 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_enter(); + else + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +102,10 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_exit(); + else + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.2 From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Nov 2011 18:48:14 +0100 Subject: nohz: Remove tick_nohz_idle_enter_norcu() / tick_nohz_idle_exit_norcu() Those two APIs were provided to optimize the calls of tick_nohz_idle_enter() and rcu_idle_enter() into a single irq disabled section. This way no interrupt happening in-between would needlessly process any RCU job. Now we are talking about an optimization for which benefits have yet to be measured. Let's start simple and completely decouple idle rcu and dyntick idle logics to simplify. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- arch/powerpc/kernel/idle.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 3cd73d1fc427..9c3cd490b1bd 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -62,10 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (idle_uses_rcu) - tick_nohz_idle_enter(); - else - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + if (!idle_uses_rcu) + rcu_idle_enter(); + while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -102,10 +102,9 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (idle_uses_rcu) - tick_nohz_idle_exit(); - else - tick_nohz_idle_exit_norcu(); + if (!idle_uses_rcu) + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.2 From 9f5072d4f63f28d30d343573830ac6c85fc0deff Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Fri, 9 Dec 2011 11:35:08 +0000 Subject: powerpc: Fix wrong divisor in usecs_to_cputime Commit d57af9b (taskstats: use real microsecond granularity for CPU times) renamed msecs_to_cputime to usecs_to_cputime, but failed to update all numbers on the way. This causes nonsensical cpu idle/iowait values to be displayed in /proc/stat (the only user of usecs_to_cputime so far). This also renames __cputime_msec_factor to __cputime_usec_factor, adapting its value and using it directly in cputime_to_usecs instead of doing two multiplications. Signed-off-by: Andreas Schwab Acked-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9754743db8b9..567dd7c3ac2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -158,13 +158,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to - * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * These are all stored as 0.64 fixed-point binary fractions. */ u64 __cputime_jiffies_factor; EXPORT_SYMBOL(__cputime_jiffies_factor); -u64 __cputime_msec_factor; -EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; @@ -182,8 +182,8 @@ static void calc_cputime_factors(void) div128_by_32(HZ, 0, tb_ticks_per_sec, &res); __cputime_jiffies_factor = res.result_low; - div128_by_32(1000, 0, tb_ticks_per_sec, &res); - __cputime_msec_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; div128_by_32(1, 0, tb_ticks_per_sec, &res); __cputime_sec_factor = res.result_low; div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); -- cgit v1.2.2 From 64968f60e73d7b3f9fca1ca5cd985d75b2cbca44 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Dec 2011 17:54:13 +0000 Subject: powerpc: Only use initrd_end as the limit for alloc_bottom if it's inside the RMO. As the kernels and initrd's get bigger boot-loaders and possibly kexec-tools will need to place the initrd outside the RMO. When this happens we end up with no lowmem and the boot doesn't get very far. Only use initrd_end as the limit for alloc_bottom if it's inside the RMO. Signed-off-by: Paul Mackerras Signed-off-by: Tony Breeds Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index df47316f1aee..32b5b05082ea 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1224,14 +1224,6 @@ static void __init prom_init_mem(void) RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); - /* Check if we have an initrd after the kernel, if we do move our bottom - * point to after it - */ - if (RELOC(prom_initrd_start)) { - if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); - } - /* * If prom_memory_limit is set we reduce the upper limits *except* for * alloc_top_high. This must be the real top of RAM so we can put @@ -1269,6 +1261,15 @@ static void __init prom_init_mem(void) RELOC(alloc_top) = RELOC(rmo_top); RELOC(alloc_top_high) = RELOC(ram_top); + /* + * Check if we have an initrd after the kernel but still inside + * the RMO. If we do move our bottom point to after it. + */ + if (RELOC(prom_initrd_start) && + RELOC(prom_initrd_start) < RELOC(rmo_top) && + RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + prom_printf("memory layout at init:\n"); prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); -- cgit v1.2.2 From 3f53638c805f75989f4b4be07efcfd173cdd5e2d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Dec 2011 13:55:11 +0000 Subject: powerpc: Fix old bug in prom_init setting of the color We have an array of 16 entries and a loop of 32 iterations... oops. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 32b5b05082ea..55b4080a821e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2080,7 +2080,7 @@ static void __init prom_check_displays(void) /* Setup a usable color table when the appropriate * method is available. Should update this to set-colors */ clut = RELOC(default_colors); - for (i = 0; i < 32; i++, clut += 3) + for (i = 0; i < 16; i++, clut += 3) if (prom_set_color(ih, i, clut[0], clut[1], clut[2]) != 0) break; -- cgit v1.2.2 From 0f890c8d205e47f7cb0d381ffba582a170fd4f72 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Wed, 14 Dec 2011 22:57:15 +0000 Subject: powerpc: Rename mapping based RELOCATABLE to DYNAMIC_MEMSTART for BookE The current implementation of CONFIG_RELOCATABLE in BookE is based on mapping the page aligned kernel load address to KERNELBASE. This approach however is not enough for platforms, where the TLB page size is large (e.g, 256M on 44x). So we are renaming the RELOCATABLE used currently in BookE to DYNAMIC_MEMSTART to reflect the actual method. The CONFIG_RELOCATABLE for PPC32(BookE) based on processing of the dynamic relocations will be introduced in the later in the patch series. This change would allow the use of the old method of RELOCATABLE for platforms which can afford to enforce the page alignment (platforms with smaller TLB size). Changes since v3: * Introduced a new config, NONSTATIC_KERNEL, to denote a kernel which is either a RELOCATABLE or DYNAMIC_MEMSTART(Suggested by: Josh Boyer) Suggested-by: Scott Wood Tested-by: Scott Wood Signed-off-by: Suzuki K. Poulose Cc: Scott Wood Cc: Kumar Gala Cc: Josh Boyer Cc: Benjamin Herrenschmidt Cc: linux ppc dev Signed-off-by: Josh Boyer --- arch/powerpc/kernel/crash_dump.c | 4 ++-- arch/powerpc/kernel/head_44x.S | 4 +++- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/kernel/machine_kexec.c | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 424afb6b8fba..b3ba5163eae2 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -28,7 +28,7 @@ #define DBG(fmt...) #endif -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL void __init reserve_kdump_trampoline(void) { memblock_reserve(0, KDUMP_RESERVE_LIMIT); @@ -67,7 +67,7 @@ void __init setup_kdump_trampoline(void) DBG(" <- setup_kdump_trampoline()\n"); } -#endif /* CONFIG_RELOCATABLE */ +#endif /* CONFIG_NONSTATIC_KERNEL */ static int __init parse_savemaxmem(char *p) { diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index bb7a9c7a4c05..d7a1debda10b 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -86,8 +86,10 @@ _ENTRY(_start); bl early_init -#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_DYNAMIC_MEMSTART /* + * Mapping based, page aligned dynamic kernel loading. + * * r25 will contain RPN/ERPN for the start address of memory * * Add the difference between KERNELBASE and PAGE_OFFSET to the diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 9f5d210ddf3f..d5d78c4ceef6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -197,7 +197,7 @@ _ENTRY(__early_start) bl early_init -#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) #ifdef CONFIG_PHYS_64BIT diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 9ce1672afb59..ec50bb965538 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -128,7 +128,7 @@ void __init reserve_crashkernel(void) crash_size = resource_size(&crashk_res); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL if (crashk_res.start != KDUMP_KERNELBASE) printk("Crash kernel location must be 0x%x\n", KDUMP_KERNELBASE); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 55b4080a821e..eca626ea3f23 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2845,7 +2845,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, RELOC(of_platform) = prom_find_machine_type(); prom_printf("Detected machine type: %x\n", RELOC(of_platform)); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL /* Bail if this is a kdump kernel. */ if (PHYSICAL_START > 0) prom_panic("Error: You can't boot a kdump kernel from OF!\n"); -- cgit v1.2.2 From 239132454583d474932d8835f87a244f6f1bff9e Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Wed, 14 Dec 2011 22:57:57 +0000 Subject: powerpc/44x: Enable DYNAMIC_MEMSTART for 440x DYNAMIC_MEMSTART(old RELOCATABLE) was restricted only to PPC_47x variants of 44x. This patch enables DYNAMIC_MEMSTART for 440x based chipsets. Signed-off-by: Suzuki K. Poulose Cc: Josh Boyer Cc: Kumar Gala Cc: Benjamin Herrenschmidt Cc: linux ppc dev Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_44x.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index d7a1debda10b..0878bf5d8a68 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -804,12 +804,24 @@ skpinv: addi r4,r4,1 /* Increment */ /* * Configure and load pinned entry into TLB slot 63. */ +#ifdef CONFIG_DYNAMIC_MEMSTART + + /* Read the XLAT entry for our current mapping */ + tlbre r25,r23,PPC44x_TLB_XLAT + + lis r3,KERNELBASE@h + ori r3,r3,KERNELBASE@l + + /* Use our current RPN entry */ + mr r4,r25 +#else lis r3,PAGE_OFFSET@h ori r3,r3,PAGE_OFFSET@l /* Kernel is at the base of RAM */ li r4, 0 /* Load the kernel physical address */ +#endif /* Load the kernel PID = 0 */ li r0,0 -- cgit v1.2.2 From 9c5f7d39a86316cd13baf973c90ed27f9f1cc979 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Wed, 14 Dec 2011 22:58:12 +0000 Subject: powerpc: Process dynamic relocations for kernel The following patch implements the dynamic relocation processing for PPC32 kernel. relocate() accepts the target virtual address and relocates the kernel image to the same. Currently the following relocation types are handled : R_PPC_RELATIVE R_PPC_ADDR16_LO R_PPC_ADDR16_HI R_PPC_ADDR16_HA The last 3 relocations in the above list depends on value of Symbol indexed whose index is encoded in the Relocation entry. Hence we need the Symbol Table for processing such relocations. Note: The GNU ld for ppc32 produces buggy relocations for relocation types that depend on symbols. The value of the symbols with STB_LOCAL scope should be assumed to be zero. - Alan Modra Signed-off-by: Suzuki K. Poulose Signed-off-by: Josh Poimboeuf Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Alan Modra Cc: Kumar Gala Cc: linuxppc-dev Signed-off-by: Josh Boyer --- arch/powerpc/kernel/Makefile | 2 + arch/powerpc/kernel/reloc_32.S | 208 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 8 +- 3 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/reloc_32.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce4f7f179117..ee728e433aa2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -85,6 +85,8 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds +obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o + obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S new file mode 100644 index 000000000000..ef46ba6e094f --- /dev/null +++ b/arch/powerpc/kernel/reloc_32.S @@ -0,0 +1,208 @@ +/* + * Code to process dynamic relocations for PPC32. + * + * Copyrights (C) IBM Corporation, 2011. + * Author: Suzuki Poulose + * + * - Based on ppc64 code - reloc_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +/* Dynamic section table entry tags */ +DT_RELA = 7 /* Tag for Elf32_Rela section */ +DT_RELASZ = 8 /* Size of the Rela relocs */ +DT_RELAENT = 9 /* Size of one Rela reloc entry */ + +STN_UNDEF = 0 /* Undefined symbol index */ +STB_LOCAL = 0 /* Local binding for the symbol */ + +R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */ +R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */ +R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */ +R_PPC_RELATIVE = 22 + +/* + * r3 = desired final address + */ + +_GLOBAL(relocate) + + mflr r0 /* Save our LR */ + bl 0f /* Find our current runtime address */ +0: mflr r12 /* Make it accessible */ + mtlr r0 + + lwz r11, (p_dyn - 0b)(r12) + add r11, r11, r12 /* runtime address of .dynamic section */ + lwz r9, (p_rela - 0b)(r12) + add r9, r9, r12 /* runtime address of .rela.dyn section */ + lwz r10, (p_st - 0b)(r12) + add r10, r10, r12 /* runtime address of _stext section */ + lwz r13, (p_sym - 0b)(r12) + add r13, r13, r12 /* runtime address of .dynsym section */ + + /* + * Scan the dynamic section for RELA, RELASZ entries + */ + li r6, 0 + li r7, 0 + li r8, 0 +1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */ + cmpwi r5, 0 /* End of ELF_Dyn[] */ + beq eodyn + cmpwi r5, DT_RELA + bne relasz + lwz r7, 4(r11) /* r7 = rela.link */ + b skip +relasz: + cmpwi r5, DT_RELASZ + bne relaent + lwz r8, 4(r11) /* r8 = Total Rela relocs size */ + b skip +relaent: + cmpwi r5, DT_RELAENT + bne skip + lwz r6, 4(r11) /* r6 = Size of one Rela reloc */ +skip: + addi r11, r11, 8 + b 1b +eodyn: /* End of Dyn Table scan */ + + /* Check if we have found all the entries */ + cmpwi r7, 0 + beq done + cmpwi r8, 0 + beq done + cmpwi r6, 0 + beq done + + + /* + * Work out the current offset from the link time address of .rela + * section. + * cur_offset[r7] = rela.run[r9] - rela.link [r7] + * _stext.link[r12] = _stext.run[r10] - cur_offset[r7] + * final_offset[r3] = _stext.final[r3] - _stext.link[r12] + */ + subf r7, r7, r9 /* cur_offset */ + subf r12, r7, r10 + subf r3, r12, r3 /* final_offset */ + + subf r8, r6, r8 /* relaz -= relaent */ + /* + * Scan through the .rela table and process each entry + * r9 - points to the current .rela table entry + * r13 - points to the symbol table + */ + + /* + * Check if we have a relocation based on symbol + * r5 will hold the value of the symbol. + */ +applyrela: + lwz r4, 4(r9) /* r4 = rela.r_info */ + srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */ + cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */ + beq get_type /* value = 0 */ + /* Find the value of the symbol at index(r5) */ + slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */ + add r12, r13, r5 /* r12 = &__dyn_sym[Index] */ + + /* + * GNU ld has a bug, where dynamic relocs based on + * STB_LOCAL symbols, the value should be assumed + * to be zero. - Alan Modra + */ + /* XXX: Do we need to check if we are using GNU ld ? */ + lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */ + extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */ + cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */ + beq get_type /* We have r5 = 0 */ + lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */ + +get_type: + /* Load the relocation type to r4 */ + extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */ + + /* R_PPC_RELATIVE */ + cmpwi r4, R_PPC_RELATIVE + bne hi16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 /* final addend */ + stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + b nxtrela /* continue */ + + /* R_PPC_ADDR16_HI */ +hi16: + cmpwi r4, R_PPC_ADDR16_HI + bne ha16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + b store_half + + /* R_PPC_ADDR16_HA */ +ha16: + cmpwi r4, R_PPC_ADDR16_HA + bne lo16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r5, r0, 1, 16 /* Extract bit 16 */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + add r0, r0, r5 /* Add it to r0 */ + b store_half + + /* R_PPC_ADDR16_LO */ +lo16: + cmpwi r4, R_PPC_ADDR16_LO + bne nxtrela + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 16 /* r0 &= 0xffff */ + /* Fall through to */ + + /* Store half word */ +store_half: + sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */ + +nxtrela: + /* + * We have to flush the modified instructions to the + * main storage from the d-cache. And also, invalidate the + * cached instructions in i-cache which has been modified. + * + * We delay the sync / isync operation till the end, since + * we won't be executing the modified instructions until + * we return from here. + */ + dcbst r4,r7 + sync /* Ensure the data is flushed before icbi */ + icbi r4,r7 + cmpwi r8, 0 /* relasz = 0 ? */ + ble done + add r9, r9, r6 /* move to next entry in the .rela table */ + subf r8, r6, r8 /* relasz -= relaent */ + b applyrela + +done: + sync /* Wait for the flush to finish */ + isync /* Discard prefetched instructions */ + blr + +p_dyn: .long __dynamic_start - 0b +p_rela: .long __rela_dyn_start - 0b +p_sym: .long __dynamic_symtab - 0b +p_st: .long _stext - 0b diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 920276c0f6a1..710a54005dfb 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -170,7 +170,13 @@ SECTIONS } #ifdef CONFIG_RELOCATABLE . = ALIGN(8); - .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) } + .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) + { +#ifdef CONFIG_RELOCATABLE_PPC32 + __dynamic_symtab = .; +#endif + *(.dynsym) + } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } .dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET) { -- cgit v1.2.2 From 26ecb6c44bb33afc62905ba01b636dde70fc2dc6 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Wed, 14 Dec 2011 22:59:24 +0000 Subject: powerpc/44x: Enable CONFIG_RELOCATABLE for PPC44x The following patch adds relocatable kernel support - based on processing of dynamic relocations - for PPC44x kernel. We find the runtime address of _stext and relocate ourselves based on the following calculation. virtual_base = ALIGN(KERNELBASE,256M) + MODULO(_stext.run,256M) relocate() is called with the Effective Virtual Base Address (as shown below) | Phys. Addr| Virt. Addr | Page (256M) |------------------------| Boundary | | | | | | | | | Kernel Load |___________|_ __ _ _ _ _|<- Effective Addr(_stext)| | ^ |Virt. Base Addr | | | | | | | | | |reloc_offset| | | | | | | | | | |______v_____|<-(KERNELBASE)%256M | | | | | | | | | Page(256M) |-----------|------------| Boundary | | | The virt_phys_offset is updated accordingly, i.e, virt_phys_offset = effective. kernel virt base - kernstart_addr I have tested the patches on 440x platforms only. However this should work fine for PPC_47x also, as we only depend on the runtime address and the current TLB XLAT entry for the startup code, which is available in r25. I don't have access to a 47x board yet. So, it would be great if somebody could test this on 47x. Signed-off-by: Suzuki K. Poulose Cc: Benjamin Herrenschmidt Cc: Kumar Gala Cc: Tony Breeds Cc: Josh Boyer Cc: linuxppc-dev Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_44x.S | 95 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 0878bf5d8a68..7dd2981bcc50 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -64,6 +64,35 @@ _ENTRY(_start); mr r31,r3 /* save device tree ptr */ li r24,0 /* CPU number */ +#ifdef CONFIG_RELOCATABLE +/* + * Relocate ourselves to the current runtime address. + * This is called only by the Boot CPU. + * "relocate" is called with our current runtime virutal + * address. + * r21 will be loaded with the physical runtime address of _stext + */ + bl 0f /* Get our runtime address */ +0: mflr r21 /* Make it accessible */ + addis r21,r21,(_stext - 0b)@ha + addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */ + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 256M page. + * We could map the 256M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */ + rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virutal Address */ + + bl relocate +#endif + bl init_cpu_state /* @@ -86,7 +115,64 @@ _ENTRY(_start); bl early_init -#ifdef CONFIG_DYNAMIC_MEMSTART +#ifdef CONFIG_RELOCATABLE + /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. + * + * r25 will contain RPN/ERPN for the start address of memory + * r21 will contain the current offset of _stext + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + /* + * Compute the kernstart_addr. + * kernstart_addr => (r6,r8) + * kernstart_addr & ~0xfffffff => (r6,r7) + */ + rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */ + or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */ + + /* Store kernstart_addr */ + stw r6,0(r3) /* higher 32bit */ + stw r8,4(r3) /* lower 32bit */ + + /* + * Compute the virt_phys_offset : + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff) + * + */ + + /* KERNELBASE&~0xfffffff => (r4,r5) */ + li r4, 0 /* higer 32bit */ + lis r5,KERNELBASE@h + rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */ + + /* + * 64bit subtraction. + */ + subfc r5,r7,r5 + subfe r4,r6,r4 + + /* Store virt_phys_offset */ + lis r3,virt_phys_offset@ha + la r3,virt_phys_offset@l(r3) + + stw r4,0(r3) + stw r5,4(r3) + +#elif defined(CONFIG_DYNAMIC_MEMSTART) /* * Mapping based, page aligned dynamic kernel loading. * @@ -804,7 +890,12 @@ skpinv: addi r4,r4,1 /* Increment */ /* * Configure and load pinned entry into TLB slot 63. */ -#ifdef CONFIG_DYNAMIC_MEMSTART +#ifdef CONFIG_NONSTATIC_KERNEL + /* + * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT + * entries of the initial mapping set by the boot loader. + * The XLAT entry is stored in r25 + */ /* Read the XLAT entry for our current mapping */ tlbre r25,r23,PPC44x_TLB_XLAT -- cgit v1.2.2 From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:42 +0100 Subject: PM: Drop generic_subsys_pm_ops Since the PM core is now going to execute driver callbacks directly if the corresponding subsystem callbacks are not present, forward-only subsystem callbacks (i.e. such that only execute the corresponding driver callbacks) are not necessary any more. Thus it is possible to remove generic_subsys_pm_ops, because the only callback in there that is not forward-only, .runtime_idle, is not really used by the only user of generic_subsys_pm_ops, which is vio_bus_type. However, the generic callback routines themselves cannot be removed from generic_ops.c, because they are used individually by a number of subsystems. Signed-off-by: Rafael J. Wysocki --- arch/powerpc/kernel/vio.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index f65af61996bd..8b086299ba25 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = GENERIC_SUBSYS_PM_OPS, }; /** -- cgit v1.2.2 From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:29:42 -0800 Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Tigran Aivazian Cc: Len Brown Cc: Zhang Rui Cc: Dave Jones Cc: Peter Zijlstra Cc: Russell King Cc: Andrew Morton Cc: Arjan van de Ven Cc: "Rafael J. Wysocki" Cc: "Srivatsa S. Bhat" Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cacheinfo.c | 10 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/kernel/sysfs.c | 257 ++++++++++++++++++++-------------------- 3 files changed, 134 insertions(+), 135 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a3c684b4c862..92c6b008dd2b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -451,15 +451,15 @@ out: static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) { struct cache_dir *cache_dir; - struct sys_device *sysdev; + struct device *dev; struct kobject *kobj = NULL; - sysdev = get_cpu_sysdev(cpu_id); - WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id); - if (!sysdev) + dev = get_cpu_device(cpu_id); + WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id); + if (!dev) goto err; - kobj = kobject_create_and_add("cache", &sysdev->kobj); + kobj = kobject_create_and_add("cache", &dev->kobj); if (!kobj) goto err; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 25ddbfc7dd36..da08240353fa 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ce035c1905f0..f396ef27916b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -37,12 +37,12 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); ssize_t ret; long snooze; @@ -50,21 +50,21 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, if (ret != 1) return -EINVAL; - per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t show_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); } -static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, +static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); static int __init setup_smt_snooze_delay(char *str) @@ -117,25 +117,25 @@ static void write_##NAME(void *val) \ ppc_enable_pmcs(); \ mtspr(ADDRESS, *(unsigned long *)val); \ } \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ - smp_call_function_single(cpu->sysdev.id, read_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ const char *buf, size_t count) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ int ret = sscanf(buf, "%lx", &val); \ if (ret != 1) \ return -EINVAL; \ - smp_call_function_single(cpu->sysdev.id, write_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ return count; \ } @@ -178,22 +178,22 @@ SYSFS_PMCSETUP(purr, SPRN_PURR); SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); -static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); -static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); -static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); -static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(spurr, 0600, show_spurr, NULL); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); +static DEVICE_ATTR(purr, 0600, show_purr, store_purr); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); -static ssize_t show_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", dscr_default); } -static ssize_t __used store_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, const char *buf, +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -207,15 +207,14 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class, return count; } -static SYSDEV_CLASS_ATTR(dscr_default, 0600, +static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); static void sysfs_create_dscr_default(void) { int err = 0; if (cpu_has_feature(CPU_FTR_DSCR)) - err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_dscr_default.attr); + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } #endif /* CONFIG_PPC64 */ @@ -259,72 +258,72 @@ SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); #endif /* HAS_PPC_PMC_PA6T */ #ifdef HAS_PPC_PMC_IBM -static struct sysdev_attribute ibm_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), +static struct device_attribute ibm_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), }; #endif /* HAS_PPC_PMC_G4 */ #ifdef HAS_PPC_PMC_G4 -static struct sysdev_attribute g4_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), +static struct device_attribute g4_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), }; #endif /* HAS_PPC_PMC_G4 */ -static struct sysdev_attribute classic_pmc_attrs[] = { - _SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5), - _SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6), +static struct device_attribute classic_pmc_attrs[] = { + __ATTR(pmc1, 0600, show_pmc1, store_pmc1), + __ATTR(pmc2, 0600, show_pmc2, store_pmc2), + __ATTR(pmc3, 0600, show_pmc3, store_pmc3), + __ATTR(pmc4, 0600, show_pmc4, store_pmc4), + __ATTR(pmc5, 0600, show_pmc5, store_pmc5), + __ATTR(pmc6, 0600, show_pmc6, store_pmc6), #ifdef CONFIG_PPC64 - _SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7), - _SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8), + __ATTR(pmc7, 0600, show_pmc7, store_pmc7), + __ATTR(pmc8, 0600, show_pmc8, store_pmc8), #endif }; #ifdef HAS_PPC_PMC_PA6T -static struct sysdev_attribute pa6t_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), - _SYSDEV_ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), +static struct device_attribute pa6t_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), + __ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), + __ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), + __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), + __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), + __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), #ifdef CONFIG_DEBUG_KERNEL - _SYSDEV_ATTR(hid0, 0600, show_hid0, store_hid0), - _SYSDEV_ATTR(hid1, 0600, show_hid1, store_hid1), - _SYSDEV_ATTR(hid4, 0600, show_hid4, store_hid4), - _SYSDEV_ATTR(hid5, 0600, show_hid5, store_hid5), - _SYSDEV_ATTR(ima0, 0600, show_ima0, store_ima0), - _SYSDEV_ATTR(ima1, 0600, show_ima1, store_ima1), - _SYSDEV_ATTR(ima2, 0600, show_ima2, store_ima2), - _SYSDEV_ATTR(ima3, 0600, show_ima3, store_ima3), - _SYSDEV_ATTR(ima4, 0600, show_ima4, store_ima4), - _SYSDEV_ATTR(ima5, 0600, show_ima5, store_ima5), - _SYSDEV_ATTR(ima6, 0600, show_ima6, store_ima6), - _SYSDEV_ATTR(ima7, 0600, show_ima7, store_ima7), - _SYSDEV_ATTR(ima8, 0600, show_ima8, store_ima8), - _SYSDEV_ATTR(ima9, 0600, show_ima9, store_ima9), - _SYSDEV_ATTR(imaat, 0600, show_imaat, store_imaat), - _SYSDEV_ATTR(btcr, 0600, show_btcr, store_btcr), - _SYSDEV_ATTR(pccr, 0600, show_pccr, store_pccr), - _SYSDEV_ATTR(rpccr, 0600, show_rpccr, store_rpccr), - _SYSDEV_ATTR(der, 0600, show_der, store_der), - _SYSDEV_ATTR(mer, 0600, show_mer, store_mer), - _SYSDEV_ATTR(ber, 0600, show_ber, store_ber), - _SYSDEV_ATTR(ier, 0600, show_ier, store_ier), - _SYSDEV_ATTR(sier, 0600, show_sier, store_sier), - _SYSDEV_ATTR(siar, 0600, show_siar, store_siar), - _SYSDEV_ATTR(tsr0, 0600, show_tsr0, store_tsr0), - _SYSDEV_ATTR(tsr1, 0600, show_tsr1, store_tsr1), - _SYSDEV_ATTR(tsr2, 0600, show_tsr2, store_tsr2), - _SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3), + __ATTR(hid0, 0600, show_hid0, store_hid0), + __ATTR(hid1, 0600, show_hid1, store_hid1), + __ATTR(hid4, 0600, show_hid4, store_hid4), + __ATTR(hid5, 0600, show_hid5, store_hid5), + __ATTR(ima0, 0600, show_ima0, store_ima0), + __ATTR(ima1, 0600, show_ima1, store_ima1), + __ATTR(ima2, 0600, show_ima2, store_ima2), + __ATTR(ima3, 0600, show_ima3, store_ima3), + __ATTR(ima4, 0600, show_ima4, store_ima4), + __ATTR(ima5, 0600, show_ima5, store_ima5), + __ATTR(ima6, 0600, show_ima6, store_ima6), + __ATTR(ima7, 0600, show_ima7, store_ima7), + __ATTR(ima8, 0600, show_ima8, store_ima8), + __ATTR(ima9, 0600, show_ima9, store_ima9), + __ATTR(imaat, 0600, show_imaat, store_imaat), + __ATTR(btcr, 0600, show_btcr, store_btcr), + __ATTR(pccr, 0600, show_pccr, store_pccr), + __ATTR(rpccr, 0600, show_rpccr, store_rpccr), + __ATTR(der, 0600, show_der, store_der), + __ATTR(mer, 0600, show_mer, store_mer), + __ATTR(ber, 0600, show_ber, store_ber), + __ATTR(ier, 0600, show_ier, store_ier), + __ATTR(sier, 0600, show_sier, store_sier), + __ATTR(siar, 0600, show_siar, store_siar), + __ATTR(tsr0, 0600, show_tsr0, store_tsr0), + __ATTR(tsr1, 0600, show_tsr1, store_tsr1), + __ATTR(tsr2, 0600, show_tsr2, store_tsr2), + __ATTR(tsr3, 0600, show_tsr3, store_tsr3), #endif /* CONFIG_DEBUG_KERNEL */ }; #endif /* HAS_PPC_PMC_PA6T */ @@ -333,14 +332,14 @@ static struct sysdev_attribute pa6t_attrs[] = { static void __cpuinit register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_create_file(s, &attr_smt_snooze_delay); + device_create_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -348,14 +347,14 @@ static void __cpuinit register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -363,7 +362,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -374,24 +373,24 @@ static void __cpuinit register_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_create_file(s, &attrs[i]); + device_create_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_create_file(s, &pmc_attrs[i]); + device_create_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_create_file(s, &attr_mmcra); + device_create_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_create_file(s, &attr_purr); + device_create_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_create_file(s, &attr_spurr); + device_create_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_create_file(s, &attr_dscr); + device_create_file(s, &dev_attr_dscr); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_online(cpu); @@ -401,8 +400,8 @@ static void __cpuinit register_cpu_online(unsigned int cpu) static void unregister_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; BUG_ON(!c->hotpluggable); @@ -410,7 +409,7 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_remove_file(s, &attr_smt_snooze_delay); + device_remove_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -418,14 +417,14 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -433,7 +432,7 @@ static void unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -444,24 +443,24 @@ static void unregister_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_remove_file(s, &attrs[i]); + device_remove_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_remove_file(s, &pmc_attrs[i]); + device_remove_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_remove_file(s, &attr_mmcra); + device_remove_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_remove_file(s, &attr_purr); + device_remove_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_remove_file(s, &attr_spurr); + device_remove_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_remove_file(s, &attr_dscr); + device_remove_file(s, &dev_attr_dscr); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_offline(cpu); @@ -513,70 +512,70 @@ static struct notifier_block __cpuinitdata sysfs_cpu_nb = { static DEFINE_MUTEX(cpu_mutex); -int cpu_add_sysdev_attr(struct sysdev_attribute *attr) +int cpu_add_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_create_file(get_cpu_sysdev(cpu), attr); + device_create_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr); -int cpu_add_sysdev_attr_group(struct attribute_group *attrs) +int cpu_add_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; int ret; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - ret = sysfs_create_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + ret = sysfs_create_group(&dev->kobj, attrs); WARN_ON(ret != 0); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group); -void cpu_remove_sysdev_attr(struct sysdev_attribute *attr) +void cpu_remove_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_remove_file(get_cpu_sysdev(cpu), attr); + device_remove_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr); -void cpu_remove_sysdev_attr_group(struct attribute_group *attrs) +void cpu_remove_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - sysfs_remove_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + sysfs_remove_group(&dev->kobj, attrs); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ @@ -590,7 +589,7 @@ static void register_nodes(void) register_one_node(i); } -int sysfs_add_device_to_node(struct sys_device *dev, int nid) +int sysfs_add_device_to_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, @@ -598,7 +597,7 @@ int sysfs_add_device_to_node(struct sys_device *dev, int nid) } EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); -void sysfs_remove_device_from_node(struct sys_device *dev, int nid) +void sysfs_remove_device_from_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); @@ -614,14 +613,14 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_physical_id(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id)); } -static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); +static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { @@ -646,7 +645,7 @@ static int __init topology_init(void) if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); - sysdev_create_file(&c->sysdev, &attr_physical_id); + device_create_file(&c->dev, &dev_attr_physical_id); } if (cpu_online(cpu)) -- cgit v1.2.2 From 10fbcf4c6cb122005cdf36fc24d7683da92c7a27 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:48:43 -0800 Subject: convert 'memory' sysdev_class to a regular subsystem This moves the 'memory sysdev_class' over to a regular 'memory' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f396ef27916b..5e7c1655f13a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -592,7 +592,7 @@ static void register_nodes(void) int sysfs_add_device_to_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; - return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, + return sysfs_create_link(&node->dev.kobj, &dev->kobj, kobject_name(&dev->kobj)); } EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); @@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); void sysfs_remove_device_from_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; - sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); + sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj)); } EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); -- cgit v1.2.2 From d161a13f974c72fd7ff0069d39a3ae57cb5694ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jul 2011 03:36:29 -0400 Subject: switch procfs to umode_t use both proc_dir_entry ->mode and populating functions Signed-off-by: Al Viro --- arch/powerpc/kernel/lparcfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 84daabe2fcba..578f35f18723 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -783,7 +783,7 @@ static const struct file_operations lparcfg_fops = { static int __init lparcfg_init(void) { struct proc_dir_entry *ent; - mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && -- cgit v1.2.2