From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:01 -0700 Subject: memblock: Introduce default allocation limit and use it to replace explicit ones This introduce memblock.current_limit which is used to limit allocations from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE). The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still be used with memblock_alloc_base() to allocate really anywhere. It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears. Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I strongly recommend that you ensure that you set an appropriate limit during boot in order to guarantee that an memblock_alloc() at any time results in something that is accessible with a simple __va(). The reason is that a subsequent patch will introduce the ability for the array to resize itself by reallocating itself. The MEMBLOCK core will honor the current limit when performing those allocations. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 20 +++++++++++++++++++- arch/powerpc/kernel/setup_32.c | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fed9bf6187d1..3aec0b980f6a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -98,7 +98,7 @@ static void __init move_device_tree(void) if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); + p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -655,6 +655,21 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ +static void set_boot_memory_limit(void) +{ +#ifdef CONFIG_PPC32 + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(0x01000000); + /* 8xx can only access 8MB at the moment */ + else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) + memblock_set_current_limit(0x00800000); + else + memblock_set_current_limit(0x10000000); +#else + memblock_set_current_limit(memblock.rmo_size); +#endif +} void __init early_init_devtree(void *params) { @@ -683,6 +698,7 @@ void __init early_init_devtree(void *params) /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); + of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -718,6 +734,8 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + set_boot_memory_limit(); + /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a10ffc85ada7..b7eb1ded3b5f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -246,7 +246,7 @@ static void __init irqstack_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ + * as the memblock is limited to lowmem by default */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); -- cgit v1.2.2 From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:02 -0700 Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact server ppc64 though I hijack it on embedded ppc64 for similar purposes) and represents the area of memory that can be accessed in real mode (aka with MMU off), or on embedded, from the exception vectors (which is bolted in the TLB) which pretty much boils down to the same thing. We take that out of the generic MEMBLOCK data structure and move it into arch/powerpc where it belongs, renaming it to "RMA" while at it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_40x.S | 6 +----- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/prom.c | 29 ++++++++--------------------- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- 5 files changed, 12 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a90625f9b485..8278e8bad5a0 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -923,11 +923,7 @@ initial_mmu: mtspr SPRN_PID,r0 sync - /* Configure and load two entries into TLB slots 62 and 63. - * In case we are pinning TLBs, these are reserved in by the - * other TLB functions. If not reserving, then it doesn't - * matter where they are loaded. - */ + /* Configure and load one entry into TLB slots 63 */ clrrwi r4,r4,10 /* Mask off the real page number */ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 139a773853f4..b9ffd7deeed7 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -117,7 +117,7 @@ void __init allocate_pacas(void) * the first segment. On iSeries they must be within the area mapped * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. */ - limit = min(0x10000000ULL, memblock.rmo_size); + limit = min(0x10000000ULL, ppc64_rma_size); if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3aec0b980f6a..c3c6a8857544 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -66,6 +66,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; +u64 ppc64_rma_size; #endif static int __init early_parse_mem(char *p) @@ -492,7 +493,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, void __init early_init_dt_add_memory_arch(u64 base, u64 size) { -#if defined(CONFIG_PPC64) +#ifdef CONFIG_PPC64 if (iommu_is_off) { if (base >= 0x80000000ul) return; @@ -501,9 +502,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } #endif - memblock_add(base, size); - + /* First MEMBLOCK added, do some special initializations */ + if (memstart_addr == ~(phys_addr_t)0) + setup_initial_memory_limit(base, size); memstart_addr = min((u64)memstart_addr, base); + + /* Add the chunk to the MEMBLOCK list */ + memblock_add(base, size); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) @@ -655,22 +660,6 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ -static void set_boot_memory_limit(void) -{ -#ifdef CONFIG_PPC32 - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - memblock_set_current_limit(0x01000000); - /* 8xx can only access 8MB at the moment */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - memblock_set_current_limit(0x00800000); - else - memblock_set_current_limit(0x10000000); -#else - memblock_set_current_limit(memblock.rmo_size); -#endif -} - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -734,8 +723,6 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); - set_boot_memory_limit(); - /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d0516dbee762..1662777be5dd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -934,7 +934,7 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d135f93cb0f6..4360944b60f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -487,7 +487,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), memblock.rmo_size); + limit = min(slb0_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; -- cgit v1.2.2 From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jun 2010 19:34:05 +0200 Subject: perf: Generalize callchain_store() callchain_store() is the same on every archs, inline it in perf_event.h and rename it to perf_callchain_store() to avoid any collision. This removes repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/powerpc/kernel/perf_callchain.c | 40 +++++++++++++----------------------- 1 file changed, 14 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298e..a286c2e5a3ea 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -69,8 +57,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +77,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +99,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -246,8 +234,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +264,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -447,8 +435,8 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,14 +458,14 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } -- cgit v1.2.2 From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Jun 2010 23:03:51 +0200 Subject: perf: Generalize some arch callchain code - Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/powerpc/kernel/perf_callchain.c | 49 +++++++++++------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a286c2e5a3ea..f7a85ede8407 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs, } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } -- cgit v1.2.2 From f72c1a931e311bb7780fee19e41a89ac42cab50e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 1 Jul 2010 02:31:21 +0200 Subject: perf: Factorize callchain context handling Store the kernel and user contexts from the generic layer instead of archs, this gathers some repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/powerpc/kernel/perf_callchain.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index f7a85ede8407..d05ae4204bbf 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -57,7 +57,6 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) @@ -234,7 +233,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, next_ip); for (;;) { @@ -435,7 +433,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { -- cgit v1.2.2 From f89451fbd2b9f28f5ff156154989599ec062354b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 11 Aug 2010 01:40:27 +0000 Subject: powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_64.S | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 42e9d908914a..4d5fa12ca6e8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -202,7 +202,9 @@ syscall_exit: bge- syscall_error syscall_error_cont: ld r7,_NIP(r1) +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) /* @@ -419,6 +421,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync #endif /* CONFIG_SMP */ + /* + * If we optimise away the clear of the reservation in system + * calls because we know the CPU tracks the address of the + * reservation, then we need to clear it here to cover the + * case that the kernel context switch path has no larx + * instructions. + */ +BEGIN_FTR_SECTION + ldarx r6,0,r1 +END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ @@ -576,7 +589,16 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) andi. r0,r3,MSR_RI beq- unrecov_restore + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) /* * Clear RI before restoring r13. If we are returning to -- cgit v1.2.2 From e1f0ece113fe028593b6869fe191a991322c5d85 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 10 Aug 2010 20:02:05 +0000 Subject: powerpc: Move arch_sd_sibling_asym_packing() to smp.c Simple cleanup by moving arch_sd_sibling_asym_packing from process.c to smp.c to save an #ifdef CONFIG_SMP No functionality change. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 11 ----------- arch/powerpc/kernel/smp.c | 9 +++++++++ 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b1c648a36b03..37bc8ff16cac 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1298,14 +1298,3 @@ unsigned long randomize_et_dyn(unsigned long base) return ret; } - -#ifdef CONFIG_SMP -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; -} -#endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0008bc58e826..9019f0f1bb5e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -580,6 +580,15 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); } +int arch_sd_sibling_asym_packing(void) +{ + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + return SD_ASYM_PACKING; + } + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { -- cgit v1.2.2 From 8154c5d22d91cd16bd9985b0638c8957e4688d0e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Aug 2010 20:18:15 +0000 Subject: powerpc: Abstract indexing of lppaca structs Currently we have the lppaca structs as a simple array of NR_CPUS entries, taking up space in the data section of the kernel image. In future we would like to allocate them dynamically, so this abstracts out the accesses to the array, making it easier to change how we locate the lppaca for a given cpu in future. Specifically, lppaca[cpu] changes to lppaca_of(cpu). Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 50362b6ef6e9..8d9e3b9cda64 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -56,7 +56,7 @@ static unsigned long get_purr(void) for_each_possible_cpu(cpu) { if (firmware_has_feature(FW_FEATURE_ISERIES)) - sum_purr += lppaca[cpu].emulated_time_base; + sum_purr += lppaca_of(cpu).emulated_time_base; else { struct cpu_usage *cu; @@ -263,7 +263,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.active_system_procs); /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { + if (lppaca_of(0).shared_proc) { unsigned long pool_idle_time, pool_procs; seq_printf(m, "pool=%d\n", ppp_data.pool_num); @@ -460,8 +460,8 @@ static void pseries_cmo_data(struct seq_file *m) return; for_each_possible_cpu(cpu) { - cmo_faults += lppaca[cpu].cmo_faults; - cmo_fault_time += lppaca[cpu].cmo_fault_time; + cmo_faults += lppaca_of(cpu).cmo_faults; + cmo_fault_time += lppaca_of(cpu).cmo_fault_time; } seq_printf(m, "cmo_faults=%lu\n", cmo_faults); @@ -479,8 +479,8 @@ static void splpar_dispatch_data(struct seq_file *m) unsigned long dispatch_dispersions = 0; for_each_possible_cpu(cpu) { - dispatches += lppaca[cpu].yield_count; - dispatch_dispersions += lppaca[cpu].dispersion_count; + dispatches += lppaca_of(cpu).yield_count; + dispatch_dispersions += lppaca_of(cpu).dispersion_count; } seq_printf(m, "dispatches=%lu\n", dispatches); @@ -545,7 +545,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "partition_potential_processors=%d\n", partition_potential_processors); - seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc); + seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc); seq_printf(m, "slb_size=%d\n", mmu_slb_size); -- cgit v1.2.2 From 93c22703efa72c7527dbd586d1951c1f4a85fd70 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Aug 2010 20:18:48 +0000 Subject: powerpc: Dynamically allocate most lppaca structs This arranges for the lppaca structs for most cpus to be dynamically allocated in the same manner as the paca structs. If we don't include support for legacy iSeries, only the first lppaca is statically allocated; the rest are dynamically allocated. If we include legacy iSeries support, then we statically allocate the first 64 lppaca structs, since the iSeries hypervisor requires that the lppaca structs be present in the data section of the kernel image, but legacy iSeries supports at most 64 cpus. With CONFIG_NR_CPUS, the kernel image size for a typical pSeries config went from: text data bss dec hex filename 9524478 4734564 8469944 22728986 15ad11a ../test-1024/vmlinux to: text data bss dec hex filename 9524482 3751508 8469944 21745934 14bd10e ../test-1024/vmlinux a reduction of 983052 bytes overall. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 70 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d0a26f1770fe..1e068a46e6c3 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -26,6 +26,20 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S +/* + * We only have to have statically allocated lppaca structs on + * legacy iSeries, which supports at most 64 cpus. + */ +#ifdef CONFIG_PPC_ISERIES +#if NR_CPUS < 64 +#define NR_LPPACAS NR_CPUS +#else +#define NR_LPPACAS 64 +#endif +#else /* not iSeries */ +#define NR_LPPACAS 1 +#endif + /* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call @@ -36,7 +50,7 @@ extern unsigned long __toc_start; * will suffice to ensure that it doesn't cross a page boundary. */ struct lppaca lppaca[] = { - [0 ... (NR_CPUS-1)] = { + [0 ... (NR_LPPACAS-1)] = { .desc = 0xd397d781, /* "LpPa" */ .size = sizeof(struct lppaca), .dyn_proc_status = 2, @@ -49,6 +63,54 @@ struct lppaca lppaca[] = { }, }; +static struct lppaca *extra_lppacas; +static long __initdata lppaca_size; + +static void allocate_lppacas(int nr_cpus, unsigned long limit) +{ + if (nr_cpus <= NR_LPPACAS) + return; + + lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * + (nr_cpus - NR_LPPACAS)); + extra_lppacas = __va(memblock_alloc_base(lppaca_size, + PAGE_SIZE, limit)); +} + +static struct lppaca *new_lppaca(int cpu) +{ + struct lppaca *lp; + + if (cpu < NR_LPPACAS) + return &lppaca[cpu]; + + lp = extra_lppacas + (cpu - NR_LPPACAS); + *lp = lppaca[0]; + + return lp; +} + +static void free_lppacas(void) +{ + long new_size = 0, nr; + + if (!lppaca_size) + return; + nr = num_possible_cpus() - NR_LPPACAS; + if (nr > 0) + new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); + if (new_size >= lppaca_size) + return; + + memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); + lppaca_size = new_size; +} + +#else + +static inline void allocate_lppacas(int, unsigned long) { } +static inline void free_lppacas(void) { } + #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_STD_MMU_64 @@ -88,7 +150,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = new_lppaca(cpu); #else new_paca->kernel_pgd = swapper_pg_dir; #endif @@ -144,6 +206,8 @@ void __init allocate_pacas(void) printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", paca_size, nr_cpus, paca); + allocate_lppacas(nr_cpus, limit); + /* Can't use for_each_*_cpu, as they aren't functional yet */ for (cpu = 0; cpu < nr_cpus; cpu++) initialise_paca(&paca[cpu], cpu); @@ -164,4 +228,6 @@ void __init free_unused_pacas(void) paca_size - new_size); paca_size = new_size; + + free_lppacas(); } -- cgit v1.2.2 From cf9efce0ce3136fa076f53e53154e98455229514 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Aug 2010 19:56:43 +0000 Subject: powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 8 +- arch/powerpc/kernel/entry_64.S | 18 +++ arch/powerpc/kernel/process.c | 1 - arch/powerpc/kernel/smp.c | 5 - arch/powerpc/kernel/time.c | 268 ++++++++++++++++++-------------------- 5 files changed, 150 insertions(+), 150 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1c0607ddccc0..c63494090854 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -181,17 +181,19 @@ int main(void) offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); DEFINE(SLBSHADOW_STACKESID, offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); - DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); + DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); - DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); + DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); + DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4d5fa12ca6e8..d82878c4daa6 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -97,6 +97,24 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +BEGIN_FW_FTR_SECTION + beq 33f + /* if from user, see if there are any DTL entries to process */ + ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ + ld r11,PACA_DTL_RIDX(r13) /* get log read index */ + ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + cmpd cr1,r11,r10 + beq+ cr1,33f + bl .accumulate_stolen_time + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +33: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ + #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on REST_GPR(0,r1) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 37bc8ff16cac..84906d3fc860 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev, account_system_vtime(current); account_process_vtime(current); - calculate_steal_time(); /* * We can't take a PMU exception inside _switch() since there is a diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9019f0f1bb5e..68034bbf2e4f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused) if (smp_ops->take_timebase) smp_ops->take_timebase(); - if (system_state > SYSTEM_BOOTING) - snapshot_timebase(); - secondary_cpu_time_init(); ipi_call_lock(); @@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); - snapshot_timebases(); - dump_numa_cpu_topology(); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5d..fca20643c368 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -164,8 +164,6 @@ unsigned long ppc_proc_freq; EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; -static DEFINE_PER_CPU(u64, last_jiffy); - #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to @@ -200,62 +198,151 @@ static void calc_cputime_factors(void) } /* - * Read the PURR on systems that have it, otherwise the timebase. + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. */ -static u64 read_purr(void) +static u64 read_spurr(u64 tb) { + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); if (cpu_has_feature(CPU_FTR_PURR)) return mfspr(SPRN_PURR); - return mftb(); + return tb; } +#ifdef CONFIG_PPC_SPLPAR + /* - * Read the SPURR on systems that have it, otherwise the purr + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. */ -static u64 read_spurr(u64 purr) +static u64 scan_dispatch_log(u64 stop_tb) { - /* - * cpus without PURR won't have a SPURR - * We already know the former when we use this, so tell gcc - */ - if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) - return mfspr(SPRN_SPURR); - return purr; + unsigned long i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (i == vpa->dtl_idx) + return 0; + while (i < vpa->dtl_idx) { + dtb = dtl->timebase; + tb_delta = dtl->enqueue_to_dispatch_time + + dtl->ready_to_enqueue_time; + barrier(); + if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + /* buffer has overflowed */ + i = vpa->dtl_idx - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; } +/* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + sst = scan_dispatch_log(get_paca()->starttime_user); + ust = scan_dispatch_log(get_paca()->starttime); + get_paca()->system_time -= sst; + get_paca()->user_time -= ust; + get_paca()->stolen_time += ust + sst; +} + +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + u64 stolen = 0; + + if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; + } + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; +} + +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} + +#endif /* CONFIG_PPC_SPLPAR */ + /* * Account time for a transition between system, hard irq * or soft irq state. */ void account_system_vtime(struct task_struct *tsk) { - u64 now, nowscaled, delta, deltascaled, sys_time; + u64 now, nowscaled, delta, deltascaled; unsigned long flags; + u64 stolen, udelta, sys_scaled, user_scaled; local_irq_save(flags); - now = read_purr(); + now = mftb(); nowscaled = read_spurr(now); - delta = now - get_paca()->startpurr; + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startpurr = now; get_paca()->startspurr = nowscaled; - if (!in_interrupt()) { - /* deltascaled includes both user and system time. - * Hence scale it based on the purr ratio to estimate - * the system time */ - sys_time = get_paca()->system_time; - if (get_paca()->user_time) - deltascaled = deltascaled * sys_time / - (sys_time + get_paca()->user_time); - delta += sys_time; - get_paca()->system_time = 0; + + stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - sys_scaled; + } else { + sys_scaled = deltascaled; + } + } + get_paca()->user_time_scaled += user_scaled; + + if (in_irq() || idle_task(smp_processor_id()) != tsk) { + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); + } else { + account_idle_time(delta + stolen); } - if (in_irq() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta, deltascaled); - else - account_idle_time(delta); - __get_cpu_var(cputime_last_delta) = delta; - __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -265,125 +352,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime); * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. + * Assumes that account_system_vtime() has been called recently + * (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. */ void account_process_tick(struct task_struct *tsk, int user_tick) { cputime_t utime, utimescaled; utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; get_paca()->user_time = 0; - utimescaled = cputime_to_scaled(utime); + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } -/* - * Stuff for accounting stolen time. - */ -struct cpu_purr_data { - int initialized; /* thread is running */ - u64 tb; /* last TB value read */ - u64 purr; /* last PURR value read */ - u64 spurr; /* last SPURR value read */ -}; - -/* - * Each entry in the cpu_purr_data array is manipulated only by its - * "owner" cpu -- usually in the timer interrupt but also occasionally - * in process context for cpu online. As long as cpus do not touch - * each others' cpu_purr_data, disabling local interrupts is - * sufficient to serialize accesses. - */ -static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); - -static void snapshot_tb_and_purr(void *data) -{ - unsigned long flags; - struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); - - local_irq_save(flags); - p->tb = get_tb_or_rtc(); - p->purr = mfspr(SPRN_PURR); - wmb(); - p->initialized = 1; - local_irq_restore(flags); -} - -/* - * Called during boot when all cpus have come up. - */ -void snapshot_timebases(void) -{ - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - on_each_cpu(snapshot_tb_and_purr, NULL, 1); -} - -/* - * Must be called with interrupts disabled. - */ -void calculate_steal_time(void) -{ - u64 tb, purr; - s64 stolen; - struct cpu_purr_data *pme; - - pme = &__get_cpu_var(cpu_purr_data); - if (!pme->initialized) - return; /* !CPU_FTR_PURR or early in early boot */ - tb = mftb(); - purr = mfspr(SPRN_PURR); - stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) { - if (idle_task(smp_processor_id()) != current) - account_steal_time(stolen); - else - account_idle_time(stolen); - } - pme->tb = tb; - pme->purr = purr; -} - -#ifdef CONFIG_PPC_SPLPAR -/* - * Must be called before the cpu is added to the online map when - * a cpu is being brought up at runtime. - */ -static void snapshot_purr(void) -{ - struct cpu_purr_data *pme; - unsigned long flags; - - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - local_irq_save(flags); - pme = &__get_cpu_var(cpu_purr_data); - pme->tb = mftb(); - pme->purr = mfspr(SPRN_PURR); - pme->initialized = 1; - local_irq_restore(flags); -} - -#endif /* CONFIG_PPC_SPLPAR */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() -#define calculate_steal_time() do { } while (0) #endif -#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) -#define snapshot_purr() do { } while (0) -#endif - -/* - * Called when a cpu comes up after the system has finished booting, - * i.e. as a result of a hotplug cpu action. - */ -void snapshot_timebase(void) -{ - __get_cpu_var(last_jiffy) = get_tb_or_rtc(); - snapshot_purr(); -} - void __delay(unsigned long loops) { unsigned long start; @@ -585,8 +573,6 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); - calculate_steal_time(); - if (test_perf_event_pending()) { clear_perf_event_pending(); perf_event_do_pending(); -- cgit v1.2.2 From 872e439a45ed4a4bd499bc55cb0dffa74027f749 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 31 Aug 2010 01:59:53 +0000 Subject: powerpc/pseries: Re-enable dispatch trace log userspace interface Since the cpu accounting code uses the hypervisor dispatch trace log now when CONFIG_VIRT_CPU_ACCOUNTING = y, the previous commit disabled access to it via files in the /sys/kernel/debug/powerpc/dtl/ directory in that case. This restores those files. To do this, we now have a hook that the cpu accounting code will call as it processes each entry from the hypervisor dispatch trace log. The code in dtl.c now uses that to fill up its ring buffer, rather than having the hypervisor fill the ring buffer directly. This also fixes dtl_file_read() to handle overflow conditions a bit better and adds a spinlock to ensure that race conditions (multiple processes opening or reading the file concurrently) are handled correctly. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fca20643c368..bcb738b9ff8c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -183,6 +183,8 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +void (*dtl_consumer)(struct dtl_entry *, u64); + static void calc_cputime_factors(void) { struct div_result res; @@ -218,7 +220,7 @@ static u64 read_spurr(u64 tb) */ static u64 scan_dispatch_log(u64 stop_tb) { - unsigned long i = local_paca->dtl_ridx; + u64 i = local_paca->dtl_ridx; struct dtl_entry *dtl = local_paca->dtl_curr; struct dtl_entry *dtl_end = local_paca->dispatch_log_end; struct lppaca *vpa = local_paca->lppaca_ptr; @@ -229,6 +231,8 @@ static u64 scan_dispatch_log(u64 stop_tb) if (i == vpa->dtl_idx) return 0; while (i < vpa->dtl_idx) { + if (dtl_consumer) + dtl_consumer(dtl, i); dtb = dtl->timebase; tb_delta = dtl->enqueue_to_dispatch_time + dtl->ready_to_enqueue_time; -- cgit v1.2.2 From 05d77ac90c0d260ae18decd70507dc4f5b71a2cb Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 21 Aug 2010 11:43:20 +0000 Subject: powerpc: Remove fpscr use from [kvm_]cvt_{fd,df} Neither lfs nor stfs touch the fpscr, so remove the restore/save of it around them. Signed-off-by: Andreas Schwab Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/align.c | 4 ++-- arch/powerpc/kernel/fpu.S | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index b876e989220b..8184ee97e484 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_df(&data.dd, (float *)&data.v[4], ¤t->thread); + cvt_df(&data.dd, (float *)&data.v[4]); preempt_enable(); #else return 0; @@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); - cvt_fd((float *)&data.v[4], &data.dd, ¤t->thread); + cvt_fd((float *)&data.v[4], &data.dd); preempt_enable(); #else return 0; diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index fc8f5b14019c..e86c040ae585 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* * These are used in the alignment trap handler when emulating * single-precision loads and stores. - * We restore and save the fpscr so the task gets the same result - * and exceptions as if the cpu had performed the load or store. */ _GLOBAL(cvt_fd) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfs 0,0(r3) stfd 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr _GLOBAL(cvt_df) - lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */ - MTFSF_L(0) lfd 0,0(r3) stfs 0,0(r4) - mffs 0 - stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */ blr -- cgit v1.2.2 From cab175f9fa2973f0deb1580fca3c966fe1d3981e Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Fri, 27 Aug 2010 03:49:11 +0000 Subject: powerpc: Use is_32bit_task() helper to test 32-bit binary This patch removes all explicit tests for the TIF_32BIT flag Signed-off-by: Denis Kirjanov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/kernel/vdso.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 11f3cd9c832f..286d9783d93f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1681,7 +1681,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) + if (!is_32bit_task()) audit_syscall_entry(AUDIT_ARCH_PPC64, regs->gpr[0], regs->gpr[3], regs->gpr[4], diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 13002fe206e7..fd8728729abc 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -159,7 +159,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) { int i; - if (!vma || test_thread_flag(TIF_32BIT)) { + if (!vma || is_32bit_task()) { printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); for (i=0; i Date: Mon, 30 Aug 2010 19:23:52 +0000 Subject: powerpc/dma: Add optional platform override of dma_set_mask() Some platforms may want to override dma_set_mask() to take into account some specific "features" such as the availability of a direct-map window in addition to an iommu. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 84d6367ec003..f368c075c90b 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -12,6 +12,7 @@ #include #include #include +#include /* * Generic direct DMA implementation @@ -154,6 +155,23 @@ EXPORT_SYMBOL(dma_direct_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (ppc_md.dma_set_mask) + return ppc_md.dma_set_mask(dev, dma_mask); + if (unlikely(dma_ops == NULL)) + return -EIO; + if (dma_ops->set_dma_mask != NULL) + return dma_ops->set_dma_mask(dev, dma_mask); + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + static int __init dma_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); -- cgit v1.2.2 From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:57 +0200 Subject: perf: Deconstify struct pmu sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"` Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 8 ++++---- arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e0..5f78681ad902 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -857,7 +857,7 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -870,7 +870,7 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -882,7 +882,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1014,7 +1014,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; unsigned long flags; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae43..d7619b5e7a6e 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -428,7 +428,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; -- cgit v1.2.2 From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:08 +0200 Subject: perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 46 +++++++++++++++++--------------- arch/powerpc/kernel/perf_event_fsl_emb.c | 37 ++++++++++++------------- 2 files changed, 43 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 5f78681ad902..19131b2614b9 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .event_init = power_pmu_event_init, + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index d7619b5e7a6e..ea6a804e43fd 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; - /* * Release the PMU if this is the last perf_event. */ @@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .event_init = fsl_emb_pmu_event_init, + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } -- cgit v1.2.2 From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 17:32:03 +0200 Subject: perf: Reduce perf_disable() usage Since the current perf_disable() usage is only an optimization, remove it for now. This eases the removal of the __weak hw_perf_enable() interface. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 3 +++ arch/powerpc/kernel/perf_event_fsl_emb.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 19131b2614b9..c1408821dbc2 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -861,6 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -875,6 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); } /* @@ -901,6 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index ea6a804e43fd..9bc84a7fd901 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -262,7 +262,7 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static int fsl_emb_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuhw; @@ -271,6 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_disable(); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -310,15 +311,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_enable(); return ret; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static void fsl_emb_pmu_disable(struct perf_event *event) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_disable(); if (i < 0) goto out; @@ -346,6 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_enable(); put_cpu_var(cpu_hw_events); } -- cgit v1.2.2 From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Jun 2010 08:49:00 +0200 Subject: perf: Per PMU disable Changes perf_disable() into perf_pmu_disable(). Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 24 +++++++++++++----------- arch/powerpc/kernel/perf_event_fsl_emb.c | 18 ++++++++++-------- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c1408821dbc2..deb84bbcb0e6 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -517,7 +517,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +565,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -735,7 +735,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -769,7 +769,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -784,7 +784,7 @@ static void power_pmu_disable(struct perf_event *event) unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,7 +821,7 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -837,7 +837,7 @@ static void power_pmu_unthrottle(struct perf_event *event) if (!event->hw.idx || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -848,7 +848,7 @@ static void power_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -861,7 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -876,7 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -903,7 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } @@ -1131,6 +1131,8 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { + .pmu_enable = power_pmu_pmu_enable, + .pmu_disable = power_pmu_pmu_disable, .event_init = power_pmu_event_init, .enable = power_pmu_enable, .disable = power_pmu_disable, diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 9bc84a7fd901..84b1974c628f 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -177,7 +177,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +216,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -271,7 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; - perf_disable(); + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -311,7 +311,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -321,7 +321,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) struct cpu_hw_events *cpuhw; int i = event->hw.idx; - perf_disable(); + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -349,7 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: - perf_enable(); + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } @@ -367,7 +367,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) if (event->hw.idx < 0 || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); fsl_emb_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -378,7 +378,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -524,6 +524,8 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_pmu_enable, + .pmu_disable = fsl_emb_pmu_pmu_disable, .event_init = fsl_emb_pmu_event_init, .enable = fsl_emb_pmu_enable, .disable = fsl_emb_pmu_disable, -- cgit v1.2.2 From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jun 2010 14:37:10 +0200 Subject: perf: Rework the PMU methods Replace pmu::{enable,disable,start,stop,unthrottle} with pmu::{add,del,start,stop}, all of which take a flags argument. The new interface extends the capability to stop a counter while keeping it scheduled on the PMU. We replace the throttled state with the generic stopped state. This also allows us to efficiently stop/start counters over certain code paths (like IRQ handlers). It also allows scheduling a counter without it starting, allowing for a generic frozen state (useful for rotating stopped counters). The stopped state is implemented in two different ways, depending on how the architecture implemented the throttled state: 1) We disable the counter: a) the pmu has per-counter enable bits, we flip that b) we program a NOP event, preserving the counter state 2) We store the counter state and ignore all read/overflow events Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 105 +++++++++++++++++++----------- arch/powerpc/kernel/perf_event_fsl_emb.c | 107 +++++++++++++++++++------------ 2 files changed, 134 insertions(+), 78 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index deb84bbcb0e6..9cb4924b6c07 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void power_pmu_pmu_disable(struct pmu *pmu) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void power_pmu_pmu_enable(struct pmu *pmu) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -777,7 +785,7 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; @@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event) } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (!event->hw.idx || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { - .pmu_enable = power_pmu_pmu_enable, - .pmu_disable = power_pmu_pmu_disable, + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, - .enable = power_pmu_enable, - .disable = power_pmu_disable, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, .start_txn = power_pmu_start_txn, .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, @@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 84b1974c628f..7ecca59ddf77 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count, } /* context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) } /* context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; @@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event) put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -524,13 +552,14 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { - .pmu_enable = fsl_emb_pmu_pmu_enable, - .pmu_disable = fsl_emb_pmu_pmu_disable, + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, .event_init = fsl_emb_pmu_event_init, - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, }; /* @@ -545,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -567,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -576,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) -- cgit v1.2.2 From 9f5f9ffe50e90ed73040d2100db8bfc341cee352 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 9 Sep 2010 19:02:40 +0000 Subject: powerpc/perf: Fix sampling enable for PPC970 The logic to distinguish marked instruction events from ordinary events on PPC970 and derivatives was flawed. The result is that instruction sampling didn't get enabled in the PMU for some marked instruction events, so they would never trigger. This fixes it by adding the appropriate break statements in the switch statement. Reported-by: David Binderman Cc: stable@kernel.org Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ppc970-pmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 8eff48e20dba..3fee685de4df 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -169,9 +169,11 @@ static int p970_marked_instr_event(u64 event) switch (unit) { case PM_VPU: mask = 0x4c; /* byte 0 bits 2,3,6 */ + break; case PM_LSU0: /* byte 2 bits 0,2,3,4,6; all of byte 1 */ mask = 0x085dff00; + break; case PM_LSU1L: mask = 0x50 << 24; /* byte 3 bits 4,6 */ break; -- cgit v1.2.2 From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Oct 2010 14:08:55 +0100 Subject: Fix IRQ flag handling naming Fix the IRQ flag handling naming. In linux/irqflags.h under one configuration, it maps: local_irq_enable() -> raw_local_irq_enable() local_irq_disable() -> raw_local_irq_disable() local_irq_save() -> raw_local_irq_save() ... and under the other configuration, it maps: raw_local_irq_enable() -> local_irq_enable() raw_local_irq_disable() -> local_irq_disable() raw_local_irq_save() -> local_irq_save() ... This is quite confusing. There should be one set of names expected of the arch, and this should be wrapped to give another set of names that are expected by users of this facility. Change this to have the arch provide: flags = arch_local_save_flags() flags = arch_local_irq_save() arch_local_irq_restore(flags) arch_local_irq_disable() arch_local_irq_enable() arch_irqs_disabled_flags(flags) arch_irqs_disabled() arch_safe_halt() Then linux/irqflags.h wraps these to provide: raw_local_save_flags(flags) raw_local_irq_save(flags) raw_local_irq_restore(flags) raw_local_irq_disable() raw_local_irq_enable() raw_irqs_disabled_flags(flags) raw_irqs_disabled() raw_safe_halt() with type checking on the flags 'arguments', and then wraps those to provide: local_save_flags(flags) local_irq_save(flags) local_irq_restore(flags) local_irq_disable() local_irq_enable() irqs_disabled_flags(flags) irqs_disabled() safe_halt() with tracing included if enabled. The arch functions can now all be inline functions rather than some of them having to be macros. Signed-off-by: David Howells [X86, FRV, MN10300] Signed-off-by: Chris Metcalf [Tile] Signed-off-by: Michal Simek [Microblaze] Tested-by: Catalin Marinas [ARM] Acked-by: Thomas Gleixner Acked-by: Haavard Skinnemoen [AVR] Acked-by: Tony Luck [IA-64] Acked-by: Hirokazu Takata [M32R] Acked-by: Greg Ungerer [M68K/M68KNOMMU] Acked-by: Ralf Baechle [MIPS] Acked-by: Kyle McMartin [PA-RISC] Acked-by: Paul Mackerras [PowerPC] Acked-by: Martin Schwidefsky [S390] Acked-by: Chen Liqin [Score] Acked-by: Matt Fleming [SH] Acked-by: David S. Miller [Sparc] Acked-by: Chris Zankel [Xtensa] Reviewed-by: Richard Henderson [Alpha] Reviewed-by: Yoshinori Sato [H8300] Cc: starvik@axis.com [CRIS] Cc: jesper.nilsson@axis.com [CRIS] Cc: linux-cris-kernel@axis.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/irq.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f53029a01554..39b0c48872d2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -818,12 +818,12 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) /* * hash_page couldn't handle it, set soft interrupt enable back - * to what it was before the trap. Note that .raw_local_irq_restore + * to what it was before the trap. Note that .arch_local_irq_restore * handles any interrupts pending at this point. */ ld r3,SOFTE(r1) TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) - bl .raw_local_irq_restore + bl .arch_local_irq_restore b 11f /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4a65386995d7..1903290f5469 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -116,7 +116,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -notrace void raw_local_irq_restore(unsigned long en) +notrace void arch_local_irq_restore(unsigned long en) { /* * get_paca()->soft_enabled = en; @@ -192,7 +192,7 @@ notrace void raw_local_irq_restore(unsigned long en) __hard_irq_enable(); } -EXPORT_SYMBOL(raw_local_irq_restore); +EXPORT_SYMBOL(arch_local_irq_restore); #endif /* CONFIG_PPC64 */ static int show_other_interrupts(struct seq_file *p, int prec) -- cgit v1.2.2 From f14362d1fe81cece6f1d78483e5bbfcf8cc497bf Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 1 Oct 2010 17:06:07 +1000 Subject: powerpc, of_serial: Endianness issues setting up the serial ports The speed and clock of the serial ports is retrieved from the device tree in both the PowerPC legacy serial code and the Open Firmware serial driver, therefore they need to handle the fact that the device tree is always big endian, while the CPU may not be. Also fix other device tree references in the legacy serial code. Signed-off-by: Ian Munsie Signed-off-by: Grant Likely --- arch/powerpc/kernel/legacy_serial.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c1fd0f9658fd..c834757bebc0 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -52,14 +52,14 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { - const u32 *clk, *spd; + const __be32 *clk, *spd; u32 clock = BASE_BAUD * 16; int index; /* get clock freq. if present */ clk = of_get_property(np, "clock-frequency", NULL); if (clk && *clk) - clock = *clk; + clock = be32_to_cpup(clk); /* get default speed if present */ spd = of_get_property(np, "current-speed", NULL); @@ -109,7 +109,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_infos[index].taddr = taddr; legacy_serial_infos[index].np = of_node_get(np); legacy_serial_infos[index].clock = clock; - legacy_serial_infos[index].speed = spd ? *spd : 0; + legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; legacy_serial_infos[index].irq_check_parent = irq_check_parent; printk(KERN_DEBUG "Found legacy serial port %d for %s\n", @@ -168,7 +168,7 @@ static int __init add_legacy_soc_port(struct device_node *np, static int __init add_legacy_isa_port(struct device_node *np, struct device_node *isa_brg) { - const u32 *reg; + const __be32 *reg; const char *typep; int index = -1; u64 taddr; @@ -181,7 +181,7 @@ static int __init add_legacy_isa_port(struct device_node *np, return -1; /* Verify it's an IO port, we don't support anything else */ - if (!(reg[0] & 0x00000001)) + if (!(be32_to_cpu(reg[0]) & 0x00000001)) return -1; /* Now look for an "ibm,aix-loc" property that gives us ordering @@ -202,7 +202,7 @@ static int __init add_legacy_isa_port(struct device_node *np, taddr = 0; /* Add port, irq will be dealt with later */ - return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, + return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); } @@ -251,9 +251,9 @@ static int __init add_legacy_pci_port(struct device_node *np, * we get to their "reg" property */ if (np != pci_dev) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg && (*reg < 4)) - index = lindex = *reg; + const __be32 *reg = of_get_property(np, "reg", NULL); + if (reg && (be32_to_cpup(reg) < 4)) + index = lindex = be32_to_cpup(reg); } /* Local index means it's the Nth port in the PCI chip. Unfortunately @@ -507,7 +507,7 @@ static int __init check_legacy_serial_console(void) struct device_node *prom_stdout = NULL; int i, speed = 0, offset = 0; const char *name; - const u32 *spd; + const __be32 *spd; DBG(" -> check_legacy_serial_console()\n"); @@ -547,7 +547,7 @@ static int __init check_legacy_serial_console(void) } spd = of_get_property(prom_stdout, "current-speed", NULL); if (spd) - speed = *spd; + speed = be32_to_cpup(spd); if (strcmp(name, "serial") != 0) goto not_found; -- cgit v1.2.2 From d8862be1229534aac1768b8ac663e8fb2bb6ddf6 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 10 Sep 2010 09:41:35 +0000 Subject: powerpc/pseries: Export rtas_ibm_suspend_me() Export the rtas_ibm_suspend_me() routine. This is needed to perform partition migration in the kernel. Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 41048de3c6c3..dc67ea46465e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -805,7 +805,7 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -static int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(struct rtas_args *args) { long state; long rc; @@ -855,7 +855,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ -static int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(struct rtas_args *args) { return -ENOSYS; } -- cgit v1.2.2 From 4e74fd7d0a6eda70f9356c113450182a844abcf1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Sep 2010 09:47:40 +0000 Subject: powerpc: Use static const char arrays Signed-off-by: Joe Perches Reviewed-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4a65386995d7..47fbc56e9e1c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -1143,7 +1143,7 @@ static int virq_debug_show(struct seq_file *m, void *private) unsigned long flags; struct irq_desc *desc; const char *p; - char none[] = "none"; + static const char none[] = "none"; int i; seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", -- cgit v1.2.2 From a655237fa2f9e4afe9949abe2c511432ab9537dd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 4 Sep 2010 00:12:44 +0000 Subject: powerpc/irq.c: Add of_node_put to avoid memory leak In this case, a device_node structure is stored in another structure that is then freed without first decrementing the reference count of the device_node structure. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression x; identifier f; position p1,p2; @@ x@p1->f = \(of_find_node_by_path\|of_find_node_by_name\|of_find_node_by_phandle\|of_get_parent\|of_get_next_parent\|of_get_next_child\|of_find_compatible_node\|of_match_node\|of_find_node_by_type\|of_find_node_with_property\|of_find_matching_node\|of_parse_phandle\|of_node_get\)(...); ... when != of_node_put(x) kfree@p2(x) @script:python@ p1 << r.p1; p2 << r.p2; @@ cocci.print_main("call",p1) cocci.print_secs("free",p2) // Signed-off-by: Julia Lawall Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 47fbc56e9e1c..4002b48fd607 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -587,8 +587,10 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, * this will be fixed once slab is made available early * instead of the current cruft */ - if (mem_init_done) + if (mem_init_done) { + of_node_put(host->of_node); kfree(host); + } return NULL; } irq_map[0].host = host; -- cgit v1.2.2 From 1cb8e85a9d9da4192acfb5f70a80b0c5ce8c3e8f Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:05:45 +0000 Subject: powerpc/dma: Fix dma_iommu_dma_supported compare The table offset is in entries, each of which imply a dma address of an IOMMU page. Also, we should check the device can reach the whole IOMMU table. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 37771a518119..6e54a0fd31aa 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -74,16 +74,17 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) { struct iommu_table *tbl = get_iommu_table_base(dev); - if (!tbl || tbl->it_offset > mask) { - printk(KERN_INFO - "Warning: IOMMU offset too big for device mask\n"); - if (tbl) - printk(KERN_INFO - "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset); - else - printk(KERN_INFO "mask: 0x%08llx, table unavailable\n", - mask); + if (!tbl) { + dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx" + ", table unavailable\n", mask); + return 0; + } + + if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) { + dev_info(dev, "Warning: IOMMU window too big for device mask\n"); + dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n", + mask, (tbl->it_offset + tbl->it_size) << + IOMMU_PAGE_SHIFT); return 0; } else return 1; -- cgit v1.2.2 From ffa56e555a6e4c205e879636e6cd6104ce03421f Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:05:46 +0000 Subject: powerpc/dma: Fix check for direct DMA support The current check is wrong because it does not take the DMA offset intot account, and in the case of a driver which doesn't actually support 64bits would falsely report that device as working. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index f368c075c90b..cf02cad62d9a 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -90,7 +90,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) /* Could be improved so platforms can set the limit in case * they have limited DMA windows */ - return mask >= (memblock_end_of_DRAM() - 1); + return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1); #else return 1; #endif -- cgit v1.2.2 From edea8f6f48416d9a6fd1babb76c19cf05c802325 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:05:47 +0000 Subject: powerpc/vio: Use put_device() on device_register failure The kernel doc for device_register (and device_initialize) very clearly state to call put_device not kfree after calling, even on error. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index fa3469ddaef8..72db4b021762 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1254,8 +1254,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (device_register(&viodev->dev)) { printk(KERN_ERR "%s: failed to register device %s\n", __func__, dev_name(&viodev->dev)); - /* XXX free TCE table */ - kfree(viodev); + put_device(&viodev->dev); return NULL; } -- cgit v1.2.2 From 45848e0fc1fce399651b3f480bdeb82cc6d3d15a Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:05:48 +0000 Subject: powerpc/viobus: Free TCE table on device release Release the TCE table as the XXX suggests, except on FW_FEATURE_ISERIES, where the tables are allocated globally and reused. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 72db4b021762..d692989a4318 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1184,7 +1184,12 @@ EXPORT_SYMBOL(vio_unregister_driver); /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - /* XXX should free TCE table */ + struct iommu_table *tbl = get_iommu_table_base(dev); + + /* iSeries uses a common table for all vio devices */ + if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl) + iommu_free_table(tbl, dev->of_node ? + dev->of_node->full_name : dev_name(dev)); of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } -- cgit v1.2.2 From bc0df9ec4c014dac85c0358f56be4223bf0f3334 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:05:50 +0000 Subject: powerpc/pci: Cleanup device dma setup code Use set_dma_ops and remove unused oddly-named temp pointer sd. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 9021c4ad4bbd..10a44e68ef11 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1090,8 +1090,6 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) bus->number, bus->self ? pci_name(bus->self) : "PHB"); list_for_each_entry(dev, &bus->devices, bus_list) { - struct dev_archdata *sd = &dev->dev.archdata; - /* Cardbus can call us to add new devices to a bus, so ignore * those who are already fully discovered */ @@ -1107,7 +1105,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) set_dev_node(&dev->dev, pcibus_to_node(dev->bus)); /* Hook up default DMA ops */ - sd->dma_ops = pci_dma_ops; + set_dma_ops(&dev->dev, pci_dma_ops); set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); /* Additional platform DMA/iommu setup */ -- cgit v1.2.2 From 4108d9ba9091c55cfb968d42dd7dcae9a098b876 Mon Sep 17 00:00:00 2001 From: matt mooney Date: Wed, 22 Sep 2010 20:51:09 +0000 Subject: powerpc/Makefiles: Change to new flag variables Replace EXTRA_CFLAGS with ccflags-y and EXTRA_AFLAGS with asflags-y. Signed-off-by: matt mooney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vdso32/Makefile | 6 +++--- arch/powerpc/kernel/vdso64/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 51ead52141bd..9a7946c41738 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -14,10 +14,10 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n -EXTRA_CFLAGS := -shared -fno-common -fno-builtin -EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) -EXTRA_AFLAGS := -D__VDSO32__ -s +asflags-y := -D__VDSO32__ -s obj-y += vdso32_wrapper.o extra-y += vdso32.lds diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 79da65d44a2a..8c500d8622e4 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -9,10 +9,10 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n -EXTRA_CFLAGS := -shared -fno-common -fno-builtin -EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) -EXTRA_AFLAGS := -D__VDSO64__ -s +asflags-y := -D__VDSO64__ -s obj-y += vdso64_wrapper.o extra-y += vdso64.lds -- cgit v1.2.2 From 6edc323db720c65b9e6a770b4bed98f251dd49f0 Mon Sep 17 00:00:00 2001 From: Tirumala Marri Date: Mon, 13 Sep 2010 13:26:11 +0000 Subject: powerpc/44x: Add support for the AMCC APM821xx SoC This patch adds CPU, device tree, defconfig and bluestone board support for APM821xx SoC. Signed-off-by: Tirumala R Marri Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cpu_setup_44x.S | 1 + arch/powerpc/kernel/cputable.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S index 7d606f89a839..e32b4a9a2c22 100644 --- a/arch/powerpc/kernel/cpu_setup_44x.S +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -35,6 +35,7 @@ _GLOBAL(__setup_cpu_440grx) _GLOBAL(__setup_cpu_460ex) _GLOBAL(__setup_cpu_460gt) _GLOBAL(__setup_cpu_460sx) +_GLOBAL(__setup_cpu_apm821xx) mflr r4 bl __init_fpu_44x bl __fixup_440A_mcheck diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1f9123f412ec..b7ac795e5270 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -48,6 +48,7 @@ extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); +extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); @@ -1805,6 +1806,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 464 in APM821xx */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x12C41C80, + .cpu_name = "APM821XX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_apm821xx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, { /* 476 core */ .pvr_mask = 0xffff0000, .pvr_value = 0x11a50000, -- cgit v1.2.2 From 55ec2fca3e99f83b5c674e9aba713d848392f6cc Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 20 Sep 2010 11:23:41 -0500 Subject: powerpc: export ppc_proc_freq and ppc_tb_freq as GPL symbols Export the global variable 'ppc_tb_freq', so that modules (like the Book-E watchdog driver) can use it. To maintain consistency, ppc_proc_freq is changed to a GPL-only export. This is okay, because any module that needs this symbol should be an actual Linux driver, which must be GPL-licensed. Signed-off-by: Timur Tabi Acked-by: Josh Boyer Signed-off-by: Kumar Gala --- arch/powerpc/kernel/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bcb738b9ff8c..644f9188d8e7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -161,8 +161,9 @@ extern struct timezone sys_tz; static long timezone_offset; unsigned long ppc_proc_freq; -EXPORT_SYMBOL(ppc_proc_freq); +EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; +EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* -- cgit v1.2.2 From c71635d288ffd3bcdfb30308f681f9af34f0fc81 Mon Sep 17 00:00:00 2001 From: Matthew McClintock Date: Thu, 16 Sep 2010 17:58:23 -0500 Subject: powerpc/kexec: make masking/disabling interrupts generic Right now just the kexec crash pathway turns turns off the interrupts. Pull that out and make a generic version for use elsewhere Signed-off-by: Matthew McClintock Signed-off-by: Kumar Gala --- arch/powerpc/kernel/crash.c | 13 +------------ arch/powerpc/kernel/machine_kexec.c | 24 ++++++++++++++++++++++++ arch/powerpc/kernel/machine_kexec_32.c | 4 ++++ 3 files changed, 29 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 4457382f8667..832c8c4db254 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -414,18 +414,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_kexec_wait_realmode(crashing_cpu); #endif - for_each_irq(i) { - struct irq_desc *desc = irq_to_desc(i); - - if (!desc || !desc->chip || !desc->chip->eoi) - continue; - - if (desc->status & IRQ_INPROGRESS) - desc->chip->eoi(i); - - if (!(desc->status & IRQ_DISABLED)) - desc->chip->shutdown(i); - } + machine_kexec_mask_interrupts(); /* * Call registered shutdown routines savely. Swap out diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index dd6c141f1662..df7e20c191cd 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -14,10 +14,34 @@ #include #include #include +#include + #include #include #include +void machine_kexec_mask_interrupts(void) { + unsigned int i; + + for_each_irq(i) { + struct irq_desc *desc = irq_to_desc(i); + + if (!desc || !desc->chip) + continue; + + if (desc->chip->eoi && + desc->status & IRQ_INPROGRESS) + desc->chip->eoi(i); + + if (desc->chip->mask) + desc->chip->mask(i); + + if (desc->chip->disable && + !(desc->status & IRQ_DISABLED)) + desc->chip->disable(i); + } +} + void machine_crash_shutdown(struct pt_regs *regs) { if (ppc_md.machine_crash_shutdown) diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index ae63a964b858..e63f2e7d2efb 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -39,6 +39,10 @@ void default_machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + /* mask each interrupt so we are in a more sane state for the + * kexec kernel */ + machine_kexec_mask_interrupts(); + page_list = image->head; /* we need both effective and real address here */ -- cgit v1.2.2 From 2ed38b23597284cc96a97e295cb145a6202dfcd4 Mon Sep 17 00:00:00 2001 From: Matthew McClintock Date: Tue, 31 Aug 2010 18:24:45 -0500 Subject: powerpc/fsl_booke: Add support to boot from core other than 0 First we check to see if we are the first core booting up. This is accomplished by comparing the boot_cpuid with -1, if it is we assume this is the first core coming up. Secondly, we need to update the initial thread info structure to reflect the actual cpu we are running on otherwise smp_processor_id() and related functions will return the default initialization value of the struct or 0. Signed-off-by: Matthew McClintock Signed-off-by: Kumar Gala --- arch/powerpc/kernel/head_fsl_booke.S | 10 ++++++++-- arch/powerpc/kernel/setup_32.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 4faeba247854..529b817f473b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -152,8 +152,11 @@ _ENTRY(__early_start) /* Check to see if we're the second processor, and jump * to the secondary_start code if so */ - mfspr r24,SPRN_PIR - cmpwi r24,0 + lis r24, boot_cpuid@h + ori r24, r24, boot_cpuid@l + lwz r24, 0(r24) + cmpwi r24, -1 + mfspr r24,SPRN_PIR bne __secondary_start #endif @@ -175,6 +178,9 @@ _ENTRY(__early_start) li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + stw r24, TI_CPU(r22) + bl early_init #ifdef CONFIG_RELOCATABLE diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 93666f9cabf1..8da1632f9fe7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -46,7 +46,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); -int boot_cpuid; +int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; -- cgit v1.2.2 From 3c4b76449b4efc1a1cbd0cade09486bbc8b56401 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 7 Oct 2010 17:05:08 -0500 Subject: powerpc: Fix compile error with paca code on ppc64e arch/powerpc/kernel/paca.c: In function 'allocate_lppacas': arch/powerpc/kernel/paca.c:111:1: error: parameter name omitted arch/powerpc/kernel/paca.c:111:1: error: parameter name omitted Signed-off-by: Kumar Gala --- arch/powerpc/kernel/paca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 1e068a46e6c3..cefc0df8f1e5 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -108,7 +108,7 @@ static void free_lppacas(void) #else -static inline void allocate_lppacas(int, unsigned long) { } +static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } static inline void free_lppacas(void) { } #endif /* CONFIG_PPC_BOOK3S */ -- cgit v1.2.2 From 4490c06b581ad7d6392bb398960ef86dfd203a91 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 8 Oct 2010 08:32:11 -0500 Subject: powerpc/fsl-booke: Add support for FSL 64-bit e5500 core The new e5500 core is similar to the e500mc core but adds 64-bit support. We support running it in 32-bit mode as it is identical to the e500mc. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/Makefile | 4 +++- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 15 +++++++++++++++ arch/powerpc/kernel/cputable.c | 28 +++++++++++++++++++++++++++- arch/powerpc/kernel/traps.c | 5 +++++ 4 files changed, 50 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1dda70129141..4ed076a4db24 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -55,7 +55,9 @@ obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_E500) += idle_e500.o +endif obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o @@ -67,7 +69,7 @@ endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o -obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 0adb50ad8031..894e64fa481e 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -51,6 +51,7 @@ _GLOBAL(__e500_dcache_setup) isync blr +#ifdef CONFIG_PPC32 _GLOBAL(__setup_cpu_e200) /* enable dedicated debug exception handling resources (Debug APU) */ mfspr r3,SPRN_HID0 @@ -72,3 +73,17 @@ _GLOBAL(__setup_cpu_e500mc) bl __setup_e500mc_ivors mtlr r4 blr +#endif +/* Right now, restore and setup are the same thing */ +_GLOBAL(__restore_cpu_e5500) +_GLOBAL(__setup_cpu_e5500) + mflr r4 + bl __e500_icache_setup + bl __e500_dcache_setup +#ifdef CONFIG_PPC_BOOK3E_64 + bl .__setup_base_ivors +#else + bl __setup_e500mc_ivors +#endif + mtlr r4 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1f9123f412ec..cd5519133f86 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -66,6 +66,10 @@ extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_E500) +extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_e5500(void); +#endif /* CONFIG_E500 */ /* This table only contains "desktop" CPUs, it need to be filled with embedded * ones as well... @@ -1891,7 +1895,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc5554", } #endif /* CONFIG_E200 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 +#ifdef CONFIG_PPC32 { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, @@ -1946,6 +1952,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, +#endif /* CONFIG_PPC32 */ + { /* e5500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80240000, + .cpu_name = "e5500", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500mc", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e5500, + .cpu_restore = __restore_cpu_e5500, + .machine_check = machine_check_e500mc, + .platform = "ppce5500", + }, +#ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1960,8 +1986,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "powerpc", } -#endif /* CONFIG_E500 */ #endif /* CONFIG_PPC32 */ +#endif /* CONFIG_E500 */ #ifdef CONFIG_PPC_BOOK3E_64 { /* This is a default entry to get going, to be replaced by diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a45a63c3a0c7..1b2cdc8eec90 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -538,6 +538,11 @@ int machine_check_e500(struct pt_regs *regs) return 0; } + +int machine_check_generic(struct pt_regs *regs) +{ + return 0; +} #elif defined(CONFIG_E200) int machine_check_e200(struct pt_regs *regs) { -- cgit v1.2.2 From 55fd766b5fad8240b7a6e994b5779a46d28f73d4 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 16 Oct 2009 18:48:40 -0500 Subject: powerpc/fsl-booke64: Use TLB CAMs to cover linear mapping on FSL 64-bit chips On Freescale parts typically have TLB array for large mappings that we can bolt the linear mapping into. We utilize the code that already exists on PPC32 on the 64-bit side to setup the linear mapping to be cover by bolted TLB entries. We utilize a quarter of the variable size TLB array for this purpose. Additionally, we limit the amount of memory to what we can cover via bolted entries so we don't get secondary faults in the TLB miss handlers. We should fix this limitation in the future. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/asm-offsets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c63494090854..c3e01945ad4f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -61,7 +61,7 @@ #endif #endif -#if defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_PPC_FSL_BOOK3E) #include "../mm/mmu_decl.h" #endif @@ -470,7 +470,7 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_FSL_BOOK3E DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); -- cgit v1.2.2 From 6038f373a3dc1f1c26496e60b6c40b164716f07e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 15 Aug 2010 18:52:59 +0200 Subject: llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann Cc: Julia Lawall Cc: Christoph Hellwig --- arch/powerpc/kernel/lparcfg.c | 1 + arch/powerpc/kernel/rtas_flash.c | 3 +++ arch/powerpc/kernel/rtasd.c | 1 + 3 files changed, 5 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 50362b6ef6e9..b1dd962e247e 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -780,6 +780,7 @@ static const struct file_operations lparcfg_fops = { .write = lparcfg_write, .open = lparcfg_open, .release = single_release, + .llseek = seq_lseek, }; static int __init lparcfg_init(void) diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 67a84d8f118d..2b442e6c21e6 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -716,6 +716,7 @@ static const struct file_operations rtas_flash_operations = { .write = rtas_flash_write, .open = rtas_excl_open, .release = rtas_flash_release, + .llseek = default_llseek, }; static const struct file_operations manage_flash_operations = { @@ -724,6 +725,7 @@ static const struct file_operations manage_flash_operations = { .write = manage_flash_write, .open = rtas_excl_open, .release = rtas_excl_release, + .llseek = default_llseek, }; static const struct file_operations validate_flash_operations = { @@ -732,6 +734,7 @@ static const struct file_operations validate_flash_operations = { .write = validate_flash_write, .open = rtas_excl_open, .release = validate_flash_release, + .llseek = default_llseek, }; static int __init rtas_flash_init(void) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 638883e23e3a..0438f819fe6b 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -354,6 +354,7 @@ static const struct file_operations proc_rtas_log_operations = { .poll = rtas_log_poll, .open = rtas_log_open, .release = rtas_log_release, + .llseek = noop_llseek, }; static int enable_surveillance(int timeout) -- cgit v1.2.2 From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 14:01:34 +0800 Subject: irq_work: Add generic hardirq context callbacks Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra Acked-by: Kyle McMartin Acked-by: Martin Schwidefsky [ various fixes ] Signed-off-by: Huang Ying LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/time.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5d..54888eb10c3b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_event_pending(void) +static inline unsigned long test_irq_work_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_event_pending))); + : "i" (offsetof(struct paca_struct, irq_work_pending))); return x; } -static inline void set_perf_event_pending_flag(void) +static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } -static inline void clear_perf_event_pending(void) +static inline void clear_irq_work_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } #else /* 32-bit */ -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ -void set_perf_event_pending(void) +void set_irq_work_pending(void) { preempt_disable(); - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ /* * For iSeries shared processors, we have to let the hypervisor @@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } #ifdef CONFIG_PPC_ISERIES -- cgit v1.2.2 From 57fa7214330be2e292ddb1402834ff0b221ef29a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 19 Oct 2010 16:55:35 +1100 Subject: perf, powerpc: Fix power_pmu_event_init to not use event->ctx Commit c3f00c70 ("perf: Separate find_get_context() from event initialization") changed the generic perf_event code to call perf_event_alloc, which calls the arch-specific event_init code, before looking up the context for the new event. Unfortunately, power_pmu_event_init uses event->ctx->task to see whether the new event is a per-task event or a system-wide event, and thus crashes since event->ctx is NULL at the point where power_pmu_event_init gets called. (The reason it needs to know whether it is a per-task event is because there are some hardware events on Power systems which only count when the processor is not idle, and there are some fixed-function counters which count such events. For example, the "run cycles" event counts cycles when the processor is not idle. If the user asks to count cycles, we can use "run cycles" if this is a per-task event, since the processor is running when the task is running, by definition. We can't use "run cycles" if the user asks for "cycles" on a system-wide counter.) Fortunately the information we need is in the event->attach_state field, so we just use that instead. Signed-off-by: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20101019055535.GA10398@drongo> Signed-off-by: Ingo Molnar Reported-by: Alexey Kardashevskiy Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 9cb4924b6c07..3129c855933c 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1092,7 +1092,7 @@ static int power_pmu_event_init(struct perf_event *event) * XXX we should check if the task is an idle task. */ flags = 0; - if (event->ctx->task) + if (event->attach_state & PERF_ATTACH_TASK) flags |= PPMU_ONLY_COUNT_RUN; /* -- cgit v1.2.2 From 7096d0422153ffcc2264eef652fc3a7bca3e6d3c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 20 Oct 2010 11:45:13 -0600 Subject: of/device: Rework to use common platform_device_alloc() for allocating devices The current code allocates and manages platform_devices created from the device tree manually. It also uses an unsafe shortcut for allocating the platform_device and the resource table at the same time. (which I added in the last rework; sorry). This patch refactors the code to use platform_device_alloc() for allocating new devices. This reduces the amount of custom code implemented by of_platform, eliminates the unsafe alloc trick, and has the side benefit of letting the platform_bus code manage freeing the device data and resources when the device is freed. Signed-off-by: Grant Likely Cc: Benjamin Herrenschmidt Cc: Greg Kroah-Hartman Cc: "David S. Miller" Cc: Michal Simek --- arch/powerpc/kernel/ibmebus.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 9b626cfffce1..f62efdfd1769 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -162,13 +162,10 @@ static int ibmebus_create_device(struct device_node *dn) dev->dev.bus = &ibmebus_bus_type; dev->dev.archdata.dma_ops = &ibmebus_dma_ops; - ret = of_device_register(dev); - if (ret) { - of_device_free(dev); - return ret; - } - - return 0; + ret = of_device_add(dev); + if (ret) + platform_device_put(dev); + return ret; } static int ibmebus_create_devices(const struct of_device_id *matches) -- cgit v1.2.2 From 32c97689c46b272302053778f1a6c2facb0e220c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 20 Oct 2010 11:45:14 -0600 Subject: of/flattree: Eliminate need to provide early_init_dt_scan_chosen_arch This patch refactors the early init parsing of the chosen node so that architectures aren't forced to provide an empty implementation of early_init_dt_scan_chosen_arch. Instead, if an architecture wants to do something different, it can either use a wrapper function around early_init_dt_scan_chosen(), or it can replace it altogether. This patch was written in preparation to adding device tree support to both x86 ad MIPS. Signed-off-by: Grant Likely Tested-by: David Daney --- arch/powerpc/kernel/prom.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fed9bf6187d1..e296aae63c60 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -363,10 +363,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -void __init early_init_dt_scan_chosen_arch(unsigned long node) +int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, + int depth, void *data) { unsigned long *lprop; + /* Use common scan routine to determine if this is the chosen node */ + if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) + return 0; + #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) @@ -398,6 +403,9 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node) if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif + + /* break now */ + return 1; } #ifdef CONFIG_PPC_PSERIES @@ -679,7 +687,7 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, NULL); /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); -- cgit v1.2.2 From 96bc451a153297bf1f99ef2d633d512ea349ae7a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:42 +0200 Subject: KVM: PPC: Introduce shared page For transparent variable sharing between the hypervisor and guest, I introduce a shared page. This shared page will contain all the registers the guest can read and write safely without exiting guest context. This patch only implements the stubs required for the basic structure of the shared page. The actual register moving follows. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1c0607ddccc0..60e7db4c13af 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -400,6 +400,7 @@ int main(void) DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S -- cgit v1.2.2 From 666e7252a15b7fc4a116e65deaf6da5e4ce660e3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:43 +0200 Subject: KVM: PPC: Convert MSR to shared page One of the most obvious registers to share with the guest directly is the MSR. The MSR contains the "interrupts enabled" flag which the guest has to toggle in critical sections. So in order to bring the overhead of interrupt en- and disabling down, let's put msr into the shared page. Keep in mind that even though you can fully read its contents, writing to it doesn't always update all state. There are a few safe fields that don't require hypervisor interaction. See the documentation for a list of MSR bits that are safe to be set from inside the guest. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 60e7db4c13af..1221bcdff52f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -394,13 +394,13 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); + DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S -- cgit v1.2.2 From 2a342ed57756ad5d8af5456959433884367e5ab2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:48 +0200 Subject: KVM: PPC: Implement hypervisor interface To communicate with KVM directly we need to plumb some sort of interface between the guest and KVM. Usually those interfaces use hypercalls. This hypercall implementation is described in the last patch of the series in a special documentation file. Please read that for further information. This patch implements stubs to handle KVM PPC hypercalls on the host and guest side alike. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/Makefile | 2 ++ arch/powerpc/kernel/kvm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 arch/powerpc/kernel/kvm.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1dda70129141..3a6955dc7191 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -127,6 +127,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +obj-$(CONFIG_KVM_GUEST) += kvm.o + # Disable GCOV in odd or sensitive code GCOV_PROFILE_prom_init.o := n GCOV_PROFILE_ftrace.o := n diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c new file mode 100644 index 000000000000..4f85505e4653 --- /dev/null +++ b/arch/powerpc/kernel/kvm.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl kvm_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +EXPORT_SYMBOL_GPL(kvm_hypercall); -- cgit v1.2.2 From d17051cb8d223dffd6bb847b0565ef1654f8e0e1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:57 +0200 Subject: KVM: PPC: Generic KVM PV guest support We have all the hypervisor pieces in place now, but the guest parts are still missing. This patch implements basic awareness of KVM when running Linux as guest. It doesn't do anything with it yet though. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/asm-offsets.c | 15 +++++++++++++++ arch/powerpc/kernel/kvm.c | 3 +++ arch/powerpc/kernel/kvm_emul.S | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/kvm_emul.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3a6955dc7191..be257b0aae36 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -127,7 +127,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif -obj-$(CONFIG_KVM_GUEST) += kvm.o +obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV in odd or sensitive code GCOV_PROFILE_prom_init.o := n diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1221bcdff52f..37486cafb69d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -465,6 +465,21 @@ int main(void) DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ #endif + +#ifdef CONFIG_KVM_GUEST + DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, + scratch1)); + DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared, + scratch2)); + DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared, + scratch3)); + DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared, + int_pending)); + DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, + critical)); +#endif + #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 4f85505e4653..a5ece71ecdd2 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -30,6 +30,9 @@ #include #include +#define KVM_MAGIC_PAGE (-4096L) +#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) + unsigned long kvm_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S new file mode 100644 index 000000000000..5cfa2aeeecb0 --- /dev/null +++ b/arch/powerpc/kernel/kvm_emul.S @@ -0,0 +1,36 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2010 + * + * Authors: Alexander Graf + */ + +#include +#include +#include +#include +#include + +/* Hypercall entry point. Will be patched with device tree instructions. */ + +.global kvm_hypercall_start +kvm_hypercall_start: + li r3, -1 + nop + nop + nop + blr + +#define KVM_MAGIC_PAGE (-4096) -- cgit v1.2.2 From 73a18109829e7696226a9fd4062d339e7c6ee130 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:58 +0200 Subject: KVM: PPC: KVM PV guest stubs We will soon start and replace instructions from the text section with other, paravirtualized versions. To ease the readability of those patches I split out the generic looping and magic page mapping code out. This patch still only contains stubs. But at least it loops through the text section :). Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index a5ece71ecdd2..e93366fbbd21 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -33,6 +33,62 @@ #define KVM_MAGIC_PAGE (-4096L) #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) +#define KVM_MASK_RT 0x03e00000 + +static bool kvm_patching_worked = true; + +static inline void kvm_patch_ins(u32 *inst, u32 new_inst) +{ + *inst = new_inst; + flush_icache_range((ulong)inst, (ulong)inst + 4); +} + +static void kvm_map_magic_page(void *data) +{ + kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, + KVM_MAGIC_PAGE, /* Physical Address */ + KVM_MAGIC_PAGE); /* Effective Address */ +} + +static void kvm_check_ins(u32 *inst) +{ + u32 _inst = *inst; + u32 inst_no_rt = _inst & ~KVM_MASK_RT; + u32 inst_rt = _inst & KVM_MASK_RT; + + switch (inst_no_rt) { + } + + switch (_inst) { + } +} + +static void kvm_use_magic_page(void) +{ + u32 *p; + u32 *start, *end; + u32 tmp; + + /* Tell the host to map the magic page to -4096 on all CPUs */ + on_each_cpu(kvm_map_magic_page, NULL, 1); + + /* Quick self-test to see if the mapping works */ + if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + kvm_patching_worked = false; + return; + } + + /* Now loop through all code and find instructions */ + start = (void*)_stext; + end = (void*)_etext; + + for (p = start; p < end; p++) + kvm_check_ins(p); + + printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", + kvm_patching_worked ? "worked" : "failed"); +} + unsigned long kvm_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) @@ -69,3 +125,42 @@ unsigned long kvm_hypercall(unsigned long *in, return r3; } EXPORT_SYMBOL_GPL(kvm_hypercall); + +static int kvm_para_setup(void) +{ + extern u32 kvm_hypercall_start; + struct device_node *hyper_node; + u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -1; + + insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); + if (len % 4) + return -1; + if (len > (4 * 4)) + return -1; + + for (i = 0; i < (len / 4); i++) + kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); + + return 0; +} + +static int __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + return 0; + + if (kvm_para_setup()) + return 0; + + if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) + kvm_use_magic_page(); + + return 0; +} + +postcore_initcall(kvm_guest_init); -- cgit v1.2.2 From d1293c927568f5b5b8dd3fa263a98683cf8556dc Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:59 +0200 Subject: KVM: PPC: PV instructions to loads and stores Some instructions can simply be replaced by load and store instructions to or from the magic page. This patch replaces often called instructions that fall into the above category. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index e93366fbbd21..9ec572c4d2a5 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -33,7 +33,34 @@ #define KVM_MAGIC_PAGE (-4096L) #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) +#define KVM_INST_LWZ 0x80000000 +#define KVM_INST_STW 0x90000000 +#define KVM_INST_LD 0xe8000000 +#define KVM_INST_STD 0xf8000000 +#define KVM_INST_NOP 0x60000000 +#define KVM_INST_B 0x48000000 +#define KVM_INST_B_MASK 0x03ffffff +#define KVM_INST_B_MAX 0x01ffffff + #define KVM_MASK_RT 0x03e00000 +#define KVM_INST_MFMSR 0x7c0000a6 +#define KVM_INST_MFSPR_SPRG0 0x7c1042a6 +#define KVM_INST_MFSPR_SPRG1 0x7c1142a6 +#define KVM_INST_MFSPR_SPRG2 0x7c1242a6 +#define KVM_INST_MFSPR_SPRG3 0x7c1342a6 +#define KVM_INST_MFSPR_SRR0 0x7c1a02a6 +#define KVM_INST_MFSPR_SRR1 0x7c1b02a6 +#define KVM_INST_MFSPR_DAR 0x7c1302a6 +#define KVM_INST_MFSPR_DSISR 0x7c1202a6 + +#define KVM_INST_MTSPR_SPRG0 0x7c1043a6 +#define KVM_INST_MTSPR_SPRG1 0x7c1143a6 +#define KVM_INST_MTSPR_SPRG2 0x7c1243a6 +#define KVM_INST_MTSPR_SPRG3 0x7c1343a6 +#define KVM_INST_MTSPR_SRR0 0x7c1a03a6 +#define KVM_INST_MTSPR_SRR1 0x7c1b03a6 +#define KVM_INST_MTSPR_DAR 0x7c1303a6 +#define KVM_INST_MTSPR_DSISR 0x7c1203a6 static bool kvm_patching_worked = true; @@ -43,6 +70,34 @@ static inline void kvm_patch_ins(u32 *inst, u32 new_inst) flush_icache_range((ulong)inst, (ulong)inst + 4); } +static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff)); +} + +static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc)); +#endif +} + +static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt) +{ + kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc)); +} + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -57,6 +112,60 @@ static void kvm_check_ins(u32 *inst) u32 inst_rt = _inst & KVM_MASK_RT; switch (inst_no_rt) { + /* Loads */ + case KVM_INST_MFMSR: + kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); + break; + case KVM_INST_MFSPR_SPRG0: + kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MFSPR_SPRG1: + kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MFSPR_SPRG2: + kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MFSPR_SPRG3: + kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MFSPR_SRR0: + kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MFSPR_SRR1: + kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MFSPR_DAR: + kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MFSPR_DSISR: + kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); + break; + + /* Stores */ + case KVM_INST_MTSPR_SPRG0: + kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); + break; + case KVM_INST_MTSPR_SPRG1: + kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); + break; + case KVM_INST_MTSPR_SPRG2: + kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); + break; + case KVM_INST_MTSPR_SPRG3: + kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); + break; + case KVM_INST_MTSPR_SRR0: + kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); + break; + case KVM_INST_MTSPR_SRR1: + kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); + break; + case KVM_INST_MTSPR_DAR: + kvm_patch_ins_std(inst, magic_var(dar), inst_rt); + break; + case KVM_INST_MTSPR_DSISR: + kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); + break; } switch (_inst) { -- cgit v1.2.2 From d1290b15e7f139e24150cc6e6d8e904214359e8a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:00 +0200 Subject: KVM: PPC: PV tlbsync to nop With our current MMU scheme we don't need to know about the tlbsync instruction. So we can just nop it out. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ec572c4d2a5..3258922cc22c 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -62,6 +62,8 @@ #define KVM_INST_MTSPR_DAR 0x7c1303a6 #define KVM_INST_MTSPR_DSISR 0x7c1203a6 +#define KVM_INST_TLBSYNC 0x7c00046c + static bool kvm_patching_worked = true; static inline void kvm_patch_ins(u32 *inst, u32 new_inst) @@ -98,6 +100,11 @@ static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt) kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc)); } +static void kvm_patch_ins_nop(u32 *inst) +{ + kvm_patch_ins(inst, KVM_INST_NOP); +} + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -166,6 +173,11 @@ static void kvm_check_ins(u32 *inst) case KVM_INST_MTSPR_DSISR: kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); break; + + /* Nops */ + case KVM_INST_TLBSYNC: + kvm_patch_ins_nop(inst); + break; } switch (_inst) { -- cgit v1.2.2 From 2d4f567103ff5a931e773f2e356b4eb303115deb Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:01 +0200 Subject: KVM: PPC: Introduce kvm_tmp framework We will soon require more sophisticated methods to replace single instructions with multiple instructions. We do that by branching to a memory region where we write replacement code for the instruction to. This region needs to be within 32 MB of the patched instruction though, because that's the furthest we can jump with immediate branches. So we keep 1MB of free space around in bss. After we're done initing we can just tell the mm system that the unused pages are free, but until then we have enough space to fit all our code in. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 3258922cc22c..926f93fd722d 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -65,6 +65,8 @@ #define KVM_INST_TLBSYNC 0x7c00046c static bool kvm_patching_worked = true; +static char kvm_tmp[1024 * 1024]; +static int kvm_tmp_index; static inline void kvm_patch_ins(u32 *inst, u32 new_inst) { @@ -105,6 +107,23 @@ static void kvm_patch_ins_nop(u32 *inst) kvm_patch_ins(inst, KVM_INST_NOP); } +static u32 *kvm_alloc(int len) +{ + u32 *p; + + if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) { + printk(KERN_ERR "KVM: No more space (%d + %d)\n", + kvm_tmp_index, len); + kvm_patching_worked = false; + return NULL; + } + + p = (void*)&kvm_tmp[kvm_tmp_index]; + kvm_tmp_index += len; + + return p; +} + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -270,17 +289,36 @@ static int kvm_para_setup(void) return 0; } +static __init void kvm_free_tmp(void) +{ + unsigned long start, end; + + start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; + end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; + + /* Free the tmp space we don't need */ + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + init_page_count(virt_to_page(start)); + free_page(start); + totalram_pages++; + } +} + static int __init kvm_guest_init(void) { if (!kvm_para_available()) - return 0; + goto free_tmp; if (kvm_para_setup()) - return 0; + goto free_tmp; if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) kvm_use_magic_page(); +free_tmp: + kvm_free_tmp(); + return 0; } -- cgit v1.2.2 From 71ee8e34fe26252b11668a95708783ec9c58cbda Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:02 +0200 Subject: KVM: PPC: Introduce branch patching helper We will need to patch several instruction streams over to a different code path, so we need a way to patch a single instruction with a branch somewhere else. This patch adds a helper to facilitate this patching. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 926f93fd722d..239a70d750a2 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -107,6 +107,20 @@ static void kvm_patch_ins_nop(u32 *inst) kvm_patch_ins(inst, KVM_INST_NOP); } +static void kvm_patch_ins_b(u32 *inst, int addr) +{ +#ifdef CONFIG_RELOCATABLE + /* On relocatable kernels interrupts handlers and our code + can be in different regions, so we don't patch them */ + + extern u32 __end_interrupts; + if ((ulong)inst < (ulong)&__end_interrupts) + return; +#endif + + kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK)); +} + static u32 *kvm_alloc(int len) { u32 *p; -- cgit v1.2.2 From 92234722ed631f472f1c4d79d35d8e5cf6910002 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:03 +0200 Subject: KVM: PPC: PV assembler helpers When we hook an instruction we need to make sure we don't clobber any of the registers at that point. So we write them out to scratch space in the magic page. To make sure we don't fall into a race with another piece of hooked code, we need to disable interrupts. To make the later patches and code in general easier readable, let's introduce a set of defines that save and restore r30, r31 and cr. Let's also define some helpers to read the lower 32 bits of a 64 bit field on 32 bit systems. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm_emul.S | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 5cfa2aeeecb0..1dac72dd6f6e 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -34,3 +34,33 @@ kvm_hypercall_start: blr #define KVM_MAGIC_PAGE (-4096) + +#ifdef CONFIG_64BIT +#define LL64(reg, offs, reg2) ld reg, (offs)(reg2) +#define STL64(reg, offs, reg2) std reg, (offs)(reg2) +#else +#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2) +#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2) +#endif + +#define SCRATCH_SAVE \ + /* Enable critical section. We are critical if \ + shared->critical == r1 */ \ + STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \ + \ + /* Save state */ \ + PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + mfcr r31; \ + stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); + +#define SCRATCH_RESTORE \ + /* Restore state */ \ + PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \ + lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \ + mtcr r30; \ + PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \ + \ + /* Disable critical section. We are critical if \ + shared->critical == r1 and r2 is always != r1 */ \ + STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); -- cgit v1.2.2 From 819a63dc792b0888edd3eda306a9e1e049dcbb1c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:04 +0200 Subject: KVM: PPC: PV mtmsrd L=1 The PowerPC ISA has a special instruction for mtmsr that only changes the EE and RI bits, namely the L=1 form. Since that one is reasonably often occuring and simple to implement, let's go with this first. Writing EE=0 is always just a store. Doing EE=1 also requires us to check for pending interrupts and if necessary exit back to the hypervisor. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 45 +++++++++++++++++++++++++++++++++ arch/powerpc/kernel/kvm_emul.S | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 239a70d750a2..717ab0dded25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -63,6 +63,7 @@ #define KVM_INST_MTSPR_DSISR 0x7c1203a6 #define KVM_INST_TLBSYNC 0x7c00046c +#define KVM_INST_MTMSRD_L1 0x7c010164 static bool kvm_patching_worked = true; static char kvm_tmp[1024 * 1024]; @@ -138,6 +139,43 @@ static u32 *kvm_alloc(int len) return p; } +extern u32 kvm_emulate_mtmsrd_branch_offs; +extern u32 kvm_emulate_mtmsrd_reg_offs; +extern u32 kvm_emulate_mtmsrd_len; +extern u32 kvm_emulate_mtmsrd[]; + +static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsrd_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4); + p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtmsrd_reg_offs] |= rt; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -211,6 +249,13 @@ static void kvm_check_ins(u32 *inst) case KVM_INST_TLBSYNC: kvm_patch_ins_nop(inst); break; + + /* Rewrites */ + case KVM_INST_MTMSRD_L1: + /* We use r30 and r31 during the hook */ + if (get_rt(inst_rt) < 30) + kvm_patch_ins_mtmsrd(inst, inst_rt); + break; } switch (_inst) { diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 1dac72dd6f6e..10dc4a6632fd 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -64,3 +64,59 @@ kvm_hypercall_start: /* Disable critical section. We are critical if \ shared->critical == r1 and r2 is always != r1 */ \ STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); + +.global kvm_emulate_mtmsrd +kvm_emulate_mtmsrd: + + SCRATCH_SAVE + + /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + lis r30, (~(MSR_EE | MSR_RI))@h + ori r30, r30, (~(MSR_EE | MSR_RI))@l + and r31, r31, r30 + + /* OR the register's (MSR_EE|MSR_RI) on MSR */ +kvm_emulate_mtmsrd_reg: + andi. r30, r0, (MSR_EE|MSR_RI) + or r31, r31, r30 + + /* Put MSR back into magic page */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_check + + /* Check if we may trigger an interrupt */ + andi. r30, r30, MSR_EE + beq no_check + + SCRATCH_RESTORE + + /* Nag hypervisor */ + tlbsync + + b kvm_emulate_mtmsrd_branch + +no_check: + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsrd_branch: + b . +kvm_emulate_mtmsrd_end: + +.global kvm_emulate_mtmsrd_branch_offs +kvm_emulate_mtmsrd_branch_offs: + .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_reg_offs +kvm_emulate_mtmsrd_reg_offs: + .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4 + +.global kvm_emulate_mtmsrd_len +kvm_emulate_mtmsrd_len: + .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 -- cgit v1.2.2 From 7810927760a0d16d7a41be4dab895fbbf9445bc0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:05 +0200 Subject: KVM: PPC: PV mtmsrd L=0 and mtmsr There is also a form of mtmsr where all bits need to be addressed. While the PPC64 Linux kernel behaves resonably well here, on PPC32 we do not have an L=1 form. It does mtmsr even for simple things like only changing EE. So we need to hook into that one as well and check for a mask of bits that we deem safe to change from within guest context. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 51 +++++++++++++++++++++++++ arch/powerpc/kernel/kvm_emul.S | 84 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 717ab0dded25..8ac57e2c52fa 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -63,7 +63,9 @@ #define KVM_INST_MTSPR_DSISR 0x7c1203a6 #define KVM_INST_TLBSYNC 0x7c00046c +#define KVM_INST_MTMSRD_L0 0x7c000164 #define KVM_INST_MTMSRD_L1 0x7c010164 +#define KVM_INST_MTMSR 0x7c000124 static bool kvm_patching_worked = true; static char kvm_tmp[1024 * 1024]; @@ -176,6 +178,49 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) kvm_patch_ins_b(inst, distance_start); } +extern u32 kvm_emulate_mtmsr_branch_offs; +extern u32 kvm_emulate_mtmsr_reg1_offs; +extern u32 kvm_emulate_mtmsr_reg2_offs; +extern u32 kvm_emulate_mtmsr_reg3_offs; +extern u32 kvm_emulate_mtmsr_orig_ins_offs; +extern u32 kvm_emulate_mtmsr_len; +extern u32 kvm_emulate_mtmsr[]; + +static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtmsr_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4); + p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtmsr_reg1_offs] |= rt; + p[kvm_emulate_mtmsr_reg2_offs] |= rt; + p[kvm_emulate_mtmsr_reg3_offs] |= rt; + p[kvm_emulate_mtmsr_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -256,6 +301,12 @@ static void kvm_check_ins(u32 *inst) if (get_rt(inst_rt) < 30) kvm_patch_ins_mtmsrd(inst, inst_rt); break; + case KVM_INST_MTMSR: + case KVM_INST_MTMSRD_L0: + /* We use r30 and r31 during the hook */ + if (get_rt(inst_rt) < 30) + kvm_patch_ins_mtmsr(inst, inst_rt); + break; } switch (_inst) { diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 10dc4a6632fd..8cd22f47dd01 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -120,3 +120,87 @@ kvm_emulate_mtmsrd_reg_offs: .global kvm_emulate_mtmsrd_len kvm_emulate_mtmsrd_len: .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 + + +#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS + +.global kvm_emulate_mtmsr +kvm_emulate_mtmsr: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Find the changed bits between old and new MSR */ +kvm_emulate_mtmsr_reg1: + xor r31, r0, r31 + + /* Check if we need to really do mtmsr */ + LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS) + and. r31, r31, r30 + + /* No critical bits changed? Maybe we can stay in the guest. */ + beq maybe_stay_in_guest + +do_mtmsr: + + SCRATCH_RESTORE + + /* Just fire off the mtmsr if it's critical */ +kvm_emulate_mtmsr_orig_ins: + mtmsr r0 + + b kvm_emulate_mtmsr_branch + +maybe_stay_in_guest: + + /* Check if we have to fetch an interrupt */ + lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) + cmpwi r31, 0 + beq+ no_mtmsr + + /* Check if we may trigger an interrupt */ +kvm_emulate_mtmsr_reg2: + andi. r31, r0, MSR_EE + beq no_mtmsr + + b do_mtmsr + +no_mtmsr: + + /* Put MSR into magic page because we don't call mtmsr */ +kvm_emulate_mtmsr_reg3: + STL64(r0, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtmsr_branch: + b . +kvm_emulate_mtmsr_end: + +.global kvm_emulate_mtmsr_branch_offs +kvm_emulate_mtmsr_branch_offs: + .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg1_offs +kvm_emulate_mtmsr_reg1_offs: + .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg2_offs +kvm_emulate_mtmsr_reg2_offs: + .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_reg3_offs +kvm_emulate_mtmsr_reg3_offs: + .long (kvm_emulate_mtmsr_reg3 - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_orig_ins_offs +kvm_emulate_mtmsr_orig_ins_offs: + .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4 + +.global kvm_emulate_mtmsr_len +kvm_emulate_mtmsr_len: + .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 -- cgit v1.2.2 From 644bfa013fd589b0df2470a66bcd104318ef24cd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:48:06 +0200 Subject: KVM: PPC: PV wrteei On BookE the preferred way to write the EE bit is the wrteei instruction. It already encodes the EE bit in the instruction. So in order to get BookE some speedups as well, let's also PV'nize thati instruction. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/kvm.c | 50 ++++++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/kvm_emul.S | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 8ac57e2c52fa..e93681753deb 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -67,6 +67,9 @@ #define KVM_INST_MTMSRD_L1 0x7c010164 #define KVM_INST_MTMSR 0x7c000124 +#define KVM_INST_WRTEEI_0 0x7c000146 +#define KVM_INST_WRTEEI_1 0x7c008146 + static bool kvm_patching_worked = true; static char kvm_tmp[1024 * 1024]; static int kvm_tmp_index; @@ -221,6 +224,47 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) kvm_patch_ins_b(inst, distance_start); } +#ifdef CONFIG_BOOKE + +extern u32 kvm_emulate_wrteei_branch_offs; +extern u32 kvm_emulate_wrteei_ee_offs; +extern u32 kvm_emulate_wrteei_len; +extern u32 kvm_emulate_wrteei[]; + +static void kvm_patch_ins_wrteei(u32 *inst) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_wrteei_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4); + p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE); + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + static void kvm_map_magic_page(void *data) { kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, @@ -310,6 +354,12 @@ static void kvm_check_ins(u32 *inst) } switch (_inst) { +#ifdef CONFIG_BOOKE + case KVM_INST_WRTEEI_0: + case KVM_INST_WRTEEI_1: + kvm_patch_ins_wrteei(inst); + break; +#endif } } diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 8cd22f47dd01..3199f65ede2c 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -204,3 +204,44 @@ kvm_emulate_mtmsr_orig_ins_offs: .global kvm_emulate_mtmsr_len kvm_emulate_mtmsr_len: .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 + + + +.global kvm_emulate_wrteei +kvm_emulate_wrteei: + + SCRATCH_SAVE + + /* Fetch old MSR in r31 */ + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + /* Remove MSR_EE from old MSR */ + li r30, 0 + ori r30, r30, MSR_EE + andc r31, r31, r30 + + /* OR new MSR_EE onto the old MSR */ +kvm_emulate_wrteei_ee: + ori r31, r31, 0 + + /* Write new MSR value back */ + STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_wrteei_branch: + b . +kvm_emulate_wrteei_end: + +.global kvm_emulate_wrteei_branch_offs +kvm_emulate_wrteei_branch_offs: + .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_ee_offs +kvm_emulate_wrteei_ee_offs: + .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4 + +.global kvm_emulate_wrteei_len +kvm_emulate_wrteei_len: + .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4 -- cgit v1.2.2 From a58ddea556f8877ccf7caa046b6d6b32982f5b1d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:22 +0200 Subject: KVM: PPC: Move KVM trampolines before __end_interrupts When using a relocatable kernel we need to make sure that the trampline code and the interrupt handlers are both copied to low memory. The only way to do this reliably is to put them in the copied section. This patch should make relocated kernels work with KVM. KVM-Stable-Tag Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/exceptions-64s.S | 6 ++++++ arch/powerpc/kernel/head_64.S | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f53029a01554..1667a078b3e6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -299,6 +299,12 @@ slb_miss_user_pseries: b . /* prevent spec. execution */ #endif /* __DISABLED__ */ +/* KVM's trampoline code needs to be close to the interrupt handlers */ + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + .align 7 .globl __end_interrupts __end_interrupts: diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index c571cd3c1453..f0dd577e4a5b 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -166,12 +166,6 @@ exception_marker: #include "exceptions-64s.S" #endif -/* KVM trampoline code needs to be close to the interrupt handlers */ - -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#include "../kvm/book3s_rmhandlers.S" -#endif - _GLOBAL(generic_secondary_thread_init) mr r24,r3 -- cgit v1.2.2 From 989044ee0fdc6c22a11ea1d22e2a3d17463cb564 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 30 Aug 2010 12:01:56 +0200 Subject: KVM: PPC: Fix CONFIG_KVM_GUEST && !CONFIG_KVM case When CONFIG_KVM_GUEST is selected, but CONFIG_KVM is not, we were missing some defines in asm-offsets.c and included too many headers at other places. This patch makes above configuration work. Reported-by: Stephen Rothwell Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 6 +++--- arch/powerpc/kernel/kvm.c | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 37486cafb69d..6d92b4e13ebf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -48,11 +48,11 @@ #ifdef CONFIG_PPC_ISERIES #include #endif -#ifdef CONFIG_KVM +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST) #include -#ifndef CONFIG_BOOKE -#include #endif +#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) +#include #endif #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index e93681753deb..d3a2cc50d611 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include -- cgit v1.2.2 From 7508e16c9f2a20f7721d7bc47c33a7b34c873a2c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 3 Aug 2010 11:32:56 +0200 Subject: KVM: PPC: Add feature bitmap for magic page We will soon add SR PV support to the shared page, so we need some infrastructure that allows the guest to query for features KVM exports. This patch adds a second return value to the magic mapping that indicated to the guest which features are available. Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index d3a2cc50d611..226882fe85a6 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -266,12 +266,20 @@ static void kvm_patch_ins_wrteei(u32 *inst) static void kvm_map_magic_page(void *data) { - kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE, - KVM_MAGIC_PAGE, /* Physical Address */ - KVM_MAGIC_PAGE); /* Effective Address */ + u32 *features = data; + + ulong in[8]; + ulong out[8]; + + in[0] = KVM_MAGIC_PAGE; + in[1] = KVM_MAGIC_PAGE; + + kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + + *features = out[0]; } -static void kvm_check_ins(u32 *inst) +static void kvm_check_ins(u32 *inst, u32 features) { u32 _inst = *inst; u32 inst_no_rt = _inst & ~KVM_MASK_RT; @@ -367,9 +375,10 @@ static void kvm_use_magic_page(void) u32 *p; u32 *start, *end; u32 tmp; + u32 features; /* Tell the host to map the magic page to -4096 on all CPUs */ - on_each_cpu(kvm_map_magic_page, NULL, 1); + on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { @@ -382,7 +391,7 @@ static void kvm_use_magic_page(void) end = (void*)_etext; for (p = start; p < end; p++) - kvm_check_ins(p); + kvm_check_ins(p, features); printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", kvm_patching_worked ? "worked" : "failed"); -- cgit v1.2.2 From cbe487fac7fc016dbabbcbe83f11384e1803a56d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 3 Aug 2010 10:39:35 +0200 Subject: KVM: PPC: Add mtsrin PV code This is the guest side of the mtsr acceleration. Using this a guest can now call mtsrin with almost no overhead as long as it ensures that it only uses it with (MSR_IR|MSR_DR) == 0. Linux does that, so we're good. Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/kvm.c | 60 +++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/kvm_emul.S | 50 ++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6d92b4e13ebf..7f0d6fcc28a3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -478,6 +478,7 @@ int main(void) DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, critical)); + DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr)); #endif #ifdef CONFIG_44x diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 226882fe85a6..c8bab24ff8ac 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -42,6 +42,7 @@ #define KVM_INST_B_MAX 0x01ffffff #define KVM_MASK_RT 0x03e00000 +#define KVM_MASK_RB 0x0000f800 #define KVM_INST_MFMSR 0x7c0000a6 #define KVM_INST_MFSPR_SPRG0 0x7c1042a6 #define KVM_INST_MFSPR_SPRG1 0x7c1142a6 @@ -69,6 +70,8 @@ #define KVM_INST_WRTEEI_0 0x7c000146 #define KVM_INST_WRTEEI_1 0x7c008146 +#define KVM_INST_MTSRIN 0x7c0001e4 + static bool kvm_patching_worked = true; static char kvm_tmp[1024 * 1024]; static int kvm_tmp_index; @@ -264,6 +267,51 @@ static void kvm_patch_ins_wrteei(u32 *inst) #endif +#ifdef CONFIG_PPC_BOOK3S_32 + +extern u32 kvm_emulate_mtsrin_branch_offs; +extern u32 kvm_emulate_mtsrin_reg1_offs; +extern u32 kvm_emulate_mtsrin_reg2_offs; +extern u32 kvm_emulate_mtsrin_orig_ins_offs; +extern u32 kvm_emulate_mtsrin_len; +extern u32 kvm_emulate_mtsrin[]; + +static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb) +{ + u32 *p; + int distance_start; + int distance_end; + ulong next_inst; + + p = kvm_alloc(kvm_emulate_mtsrin_len * 4); + if (!p) + return; + + /* Find out where we are and put everything there */ + distance_start = (ulong)p - (ulong)inst; + next_inst = ((ulong)inst + 4); + distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs]; + + /* Make sure we only write valid b instructions */ + if (distance_start > KVM_INST_B_MAX) { + kvm_patching_worked = false; + return; + } + + /* Modify the chunk to fit the invocation */ + memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4); + p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK; + p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10); + p[kvm_emulate_mtsrin_reg2_offs] |= rt; + p[kvm_emulate_mtsrin_orig_ins_offs] = *inst; + flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4); + + /* Patch the invocation */ + kvm_patch_ins_b(inst, distance_start); +} + +#endif + static void kvm_map_magic_page(void *data) { u32 *features = data; @@ -360,6 +408,18 @@ static void kvm_check_ins(u32 *inst, u32 features) break; } + switch (inst_no_rt & ~KVM_MASK_RB) { +#ifdef CONFIG_PPC_BOOK3S_32 + case KVM_INST_MTSRIN: + if (features & KVM_MAGIC_FEAT_SR) { + u32 inst_rb = _inst & KVM_MASK_RB; + kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); + } + break; + break; +#endif + } + switch (_inst) { #ifdef CONFIG_BOOKE case KVM_INST_WRTEEI_0: diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 3199f65ede2c..a6e97e7a55e0 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -245,3 +245,53 @@ kvm_emulate_wrteei_ee_offs: .global kvm_emulate_wrteei_len kvm_emulate_wrteei_len: .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4 + + +.global kvm_emulate_mtsrin +kvm_emulate_mtsrin: + + SCRATCH_SAVE + + LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + andi. r31, r31, MSR_DR | MSR_IR + beq kvm_emulate_mtsrin_reg1 + + SCRATCH_RESTORE + +kvm_emulate_mtsrin_orig_ins: + nop + b kvm_emulate_mtsrin_branch + +kvm_emulate_mtsrin_reg1: + /* rX >> 26 */ + rlwinm r30,r0,6,26,29 + +kvm_emulate_mtsrin_reg2: + stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30) + + SCRATCH_RESTORE + + /* Go back to caller */ +kvm_emulate_mtsrin_branch: + b . +kvm_emulate_mtsrin_end: + +.global kvm_emulate_mtsrin_branch_offs +kvm_emulate_mtsrin_branch_offs: + .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg1_offs +kvm_emulate_mtsrin_reg1_offs: + .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_reg2_offs +kvm_emulate_mtsrin_reg2_offs: + .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_orig_ins_offs +kvm_emulate_mtsrin_orig_ins_offs: + .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4 + +.global kvm_emulate_mtsrin_len +kvm_emulate_mtsrin_len: + .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 -- cgit v1.2.2 From 512ba59ed9c580b5e5575beda0041bb19a641127 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 5 Aug 2010 11:26:04 +0200 Subject: KVM: PPC: Make PV mtmsr work with r30 and r31 So far we've been restricting ourselves to r0-r29 as registers an mtmsr instruction could use. This was bad, as there are some code paths in Linux actually using r30. So let's instead handle all registers gracefully and get rid of that stupid limitation Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 39 ++++++++++++++++++++++++++++++++------- arch/powerpc/kernel/kvm_emul.S | 17 ++++++++--------- 2 files changed, 40 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index c8bab24ff8ac..10b681c092ed 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -42,6 +42,7 @@ #define KVM_INST_B_MAX 0x01ffffff #define KVM_MASK_RT 0x03e00000 +#define KVM_RT_30 0x03c00000 #define KVM_MASK_RB 0x0000f800 #define KVM_INST_MFMSR 0x7c0000a6 #define KVM_INST_MFSPR_SPRG0 0x7c1042a6 @@ -82,6 +83,15 @@ static inline void kvm_patch_ins(u32 *inst, u32 new_inst) flush_icache_range((ulong)inst, (ulong)inst + 4); } +static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt) +{ +#ifdef CONFIG_64BIT + kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc)); +#else + kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc)); +#endif +} + static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt) { #ifdef CONFIG_64BIT @@ -186,7 +196,6 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) extern u32 kvm_emulate_mtmsr_branch_offs; extern u32 kvm_emulate_mtmsr_reg1_offs; extern u32 kvm_emulate_mtmsr_reg2_offs; -extern u32 kvm_emulate_mtmsr_reg3_offs; extern u32 kvm_emulate_mtmsr_orig_ins_offs; extern u32 kvm_emulate_mtmsr_len; extern u32 kvm_emulate_mtmsr[]; @@ -216,9 +225,27 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) /* Modify the chunk to fit the invocation */ memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4); p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK; - p[kvm_emulate_mtmsr_reg1_offs] |= rt; - p[kvm_emulate_mtmsr_reg2_offs] |= rt; - p[kvm_emulate_mtmsr_reg3_offs] |= rt; + + /* Make clobbered registers work too */ + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch2), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs], + magic_var(scratch1), KVM_RT_30); + kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsr_reg1_offs] |= rt; + p[kvm_emulate_mtmsr_reg2_offs] |= rt; + break; + } + p[kvm_emulate_mtmsr_orig_ins_offs] = *inst; flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4); @@ -402,9 +429,7 @@ static void kvm_check_ins(u32 *inst, u32 features) break; case KVM_INST_MTMSR: case KVM_INST_MTMSRD_L0: - /* We use r30 and r31 during the hook */ - if (get_rt(inst_rt) < 30) - kvm_patch_ins_mtmsr(inst, inst_rt); + kvm_patch_ins_mtmsr(inst, inst_rt); break; } diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index a6e97e7a55e0..65305325250b 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -135,7 +135,8 @@ kvm_emulate_mtmsr: /* Find the changed bits between old and new MSR */ kvm_emulate_mtmsr_reg1: - xor r31, r0, r31 + ori r30, r0, 0 + xor r31, r30, r31 /* Check if we need to really do mtmsr */ LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS) @@ -156,14 +157,17 @@ kvm_emulate_mtmsr_orig_ins: maybe_stay_in_guest: + /* Get the target register in r30 */ +kvm_emulate_mtmsr_reg2: + ori r30, r0, 0 + /* Check if we have to fetch an interrupt */ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) cmpwi r31, 0 beq+ no_mtmsr /* Check if we may trigger an interrupt */ -kvm_emulate_mtmsr_reg2: - andi. r31, r0, MSR_EE + andi. r31, r30, MSR_EE beq no_mtmsr b do_mtmsr @@ -171,8 +175,7 @@ kvm_emulate_mtmsr_reg2: no_mtmsr: /* Put MSR into magic page because we don't call mtmsr */ -kvm_emulate_mtmsr_reg3: - STL64(r0, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) + STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) SCRATCH_RESTORE @@ -193,10 +196,6 @@ kvm_emulate_mtmsr_reg1_offs: kvm_emulate_mtmsr_reg2_offs: .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4 -.global kvm_emulate_mtmsr_reg3_offs -kvm_emulate_mtmsr_reg3_offs: - .long (kvm_emulate_mtmsr_reg3 - kvm_emulate_mtmsr) / 4 - .global kvm_emulate_mtmsr_orig_ins_offs kvm_emulate_mtmsr_orig_ins_offs: .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4 -- cgit v1.2.2 From df08bd10266ce6132278f6b4ddc4bb0a12330b56 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 5 Aug 2010 15:44:41 +0200 Subject: KVM: PPC: Make PV mtmsrd L=1 work with r30 and r31 We had an arbitrary limitation in mtmsrd L=1 that kept us from using r30 and r31 as input registers. Let's get rid of that and get more potential speedups! Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 21 +++++++++++++++++---- arch/powerpc/kernel/kvm_emul.S | 8 +++++++- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 10b681c092ed..48a033865410 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -158,6 +158,7 @@ static u32 *kvm_alloc(int len) extern u32 kvm_emulate_mtmsrd_branch_offs; extern u32 kvm_emulate_mtmsrd_reg_offs; +extern u32 kvm_emulate_mtmsrd_orig_ins_offs; extern u32 kvm_emulate_mtmsrd_len; extern u32 kvm_emulate_mtmsrd[]; @@ -186,7 +187,21 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt) /* Modify the chunk to fit the invocation */ memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4); p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK; - p[kvm_emulate_mtmsrd_reg_offs] |= rt; + switch (get_rt(rt)) { + case 30: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch2), KVM_RT_30); + break; + case 31: + kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs], + magic_var(scratch1), KVM_RT_30); + break; + default: + p[kvm_emulate_mtmsrd_reg_offs] |= rt; + break; + } + + p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst; flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4); /* Patch the invocation */ @@ -423,9 +438,7 @@ static void kvm_check_ins(u32 *inst, u32 features) /* Rewrites */ case KVM_INST_MTMSRD_L1: - /* We use r30 and r31 during the hook */ - if (get_rt(inst_rt) < 30) - kvm_patch_ins_mtmsrd(inst, inst_rt); + kvm_patch_ins_mtmsrd(inst, inst_rt); break; case KVM_INST_MTMSR: case KVM_INST_MTMSRD_L0: diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index 65305325250b..f2b1b2523e61 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -78,7 +78,8 @@ kvm_emulate_mtmsrd: /* OR the register's (MSR_EE|MSR_RI) on MSR */ kvm_emulate_mtmsrd_reg: - andi. r30, r0, (MSR_EE|MSR_RI) + ori r30, r0, 0 + andi. r30, r30, (MSR_EE|MSR_RI) or r31, r31, r30 /* Put MSR back into magic page */ @@ -96,6 +97,7 @@ kvm_emulate_mtmsrd_reg: SCRATCH_RESTORE /* Nag hypervisor */ +kvm_emulate_mtmsrd_orig_ins: tlbsync b kvm_emulate_mtmsrd_branch @@ -117,6 +119,10 @@ kvm_emulate_mtmsrd_branch_offs: kvm_emulate_mtmsrd_reg_offs: .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4 +.global kvm_emulate_mtmsrd_orig_ins_offs +kvm_emulate_mtmsrd_orig_ins_offs: + .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4 + .global kvm_emulate_mtmsrd_len kvm_emulate_mtmsrd_len: .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 -- cgit v1.2.2 From ad0873763a83e7b31ba87a85ec2027dd6a9d7b55 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 17 Aug 2010 11:41:44 +0200 Subject: KVM: PPC: Force enable nap on KVM There are some heuristics in the PPC power management code that try to find out if the particular hardware we're running on supports proper power management or just hangs the machine when going into nap mode. Since we know that KVM is safe with nap, let's force enable it in the PV code once we're certain that we are on a KVM VM. Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 48a033865410..669d989be1d6 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -582,6 +582,9 @@ static int __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) kvm_use_magic_page(); + /* Enable napping */ + powersave_nap = 1; + free_tmp: kvm_free_tmp(); -- cgit v1.2.2 From 591bd8e7b4c8b9246d7a1c81ffbd28e35dc5de4e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 17 Aug 2010 22:08:39 +0200 Subject: KVM: PPC: Enable napping only for Book3s_64 Before I incorrectly enabled napping also for BookE, which would result in needless dcache flushes. Since we only need to force enable napping on Book3s_64 because it doesn't go into MSR_POW otherwise, we can just #ifdef that code to this particular platform. Reported-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 669d989be1d6..428d0e538aec 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -582,8 +582,10 @@ static int __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) kvm_use_magic_page(); +#ifdef CONFIG_PPC_BOOK3S_64 /* Enable napping */ powersave_nap = 1; +#endif free_tmp: kvm_free_tmp(); -- cgit v1.2.2 From 732eacc0542d0aa48797f675888b85d6065af837 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 26 Oct 2010 14:22:23 -0700 Subject: replace nested max/min macros with {max,min}3 macro Use the new {max,min}3 macros to save some cycles and bytes on the stack. This patch substitutes trivial nested macros with their counterpart. Signed-off-by: Hagen Paul Pfeifer Cc: Joe Perches Cc: Ingo Molnar Cc: Hartley Sweeten Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Herbert Xu Cc: Roland Dreier Cc: Sean Hefty Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index d692989a4318..441d2a722f06 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -238,9 +238,7 @@ static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) * memory in this pool does not change. */ if (spare_needed && reserve_freed) { - tmp = min(spare_needed, min(reserve_freed, - (viodev->cmo.entitled - - VIO_CMO_MIN_ENT))); + tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT)); vio_cmo.spare += tmp; viodev->cmo.entitled -= tmp; -- cgit v1.2.2 From 9b05a69e0534ec70bc94921936ffa05b330507cb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:47 -0700 Subject: ptrace: change signature of arch_ptrace() Fix up the arguments to arch_ptrace() to take account of the fact that @addr and @data are now unsigned long rather than long as of a preceding patch in this series. Signed-off-by: Namhyung Kim Cc: Acked-by: Roland McGrath Acked-by: David Howells Acked-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/ptrace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 286d9783d93f..136763568a7b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1406,8 +1406,8 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version */ -static long arch_ptrace_old(struct task_struct *child, long request, long addr, - long data) +static long arch_ptrace_old(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ @@ -1434,7 +1434,8 @@ static long arch_ptrace_old(struct task_struct *child, long request, long addr, return -EPERM; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; @@ -1446,11 +1447,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; @@ -1474,11 +1475,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; -- cgit v1.2.2 From f68d2048206389603d646b06e3cc16f1bbc3ff88 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:01 -0700 Subject: ptrace: cleanup arch_ptrace() on powerpc Use new 'datavp' and 'datalp' variables in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/ptrace.c | 51 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 136763568a7b..a9b32967cff6 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1409,26 +1409,28 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) static long arch_ptrace_old(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + void __user *datavp = (void __user *) data; + switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (void __user *) data); + datavp); case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (const void __user *) data); + datavp); case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); } return -EPERM; @@ -1438,6 +1440,8 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; switch (request) { /* read the word at location addr in the USER area. */ @@ -1464,7 +1468,7 @@ long arch_ptrace(struct task_struct *child, long request, tmp = ((unsigned long *)child->thread.fpr) [TS_FPRWIDTH * (index - PT_FPR0)]; } - ret = put_user(tmp,(unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } @@ -1526,11 +1530,11 @@ long arch_ptrace(struct task_struct *child, long request, dbginfo.features = 0; #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - if (!access_ok(VERIFY_WRITE, data, + if (!access_ok(VERIFY_WRITE, datavp, sizeof(struct ppc_debug_info))) return -EFAULT; - ret = __copy_to_user((struct ppc_debug_info __user *)data, - &dbginfo, sizeof(struct ppc_debug_info)) ? + ret = __copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info)) ? -EFAULT : 0; break; } @@ -1538,11 +1542,10 @@ long arch_ptrace(struct task_struct *child, long request, case PPC_PTRACE_SETHWDEBUG: { struct ppc_hw_breakpoint bp_info; - if (!access_ok(VERIFY_READ, data, + if (!access_ok(VERIFY_READ, datavp, sizeof(struct ppc_hw_breakpoint))) return -EFAULT; - ret = __copy_from_user(&bp_info, - (struct ppc_hw_breakpoint __user *)data, + ret = __copy_from_user(&bp_info, datavp, sizeof(struct ppc_hw_breakpoint)) ? -EFAULT : 0; if (!ret) @@ -1561,11 +1564,9 @@ long arch_ptrace(struct task_struct *child, long request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, - (unsigned long __user *)data); + ret = put_user(child->thread.dac1, datalp); #else - ret = put_user(child->thread.dabr, - (unsigned long __user *)data); + ret = put_user(child->thread.dabr, datalp); #endif break; } @@ -1581,7 +1582,7 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (void __user *) data); + datavp); #ifdef CONFIG_PPC64 case PTRACE_SETREGS64: @@ -1590,19 +1591,19 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (const void __user *) data); + datavp); case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (void __user *) data); + datavp); case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (const void __user *) data); + datavp); #ifdef CONFIG_ALTIVEC case PTRACE_GETVRREGS: @@ -1610,40 +1611,40 @@ long arch_ptrace(struct task_struct *child, long request, REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (void __user *) data); + datavp); case PTRACE_SETVRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_VSX case PTRACE_GETVSRREGS: return copy_regset_to_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PTRACE_SETVSRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_SPE case PTRACE_GETEVRREGS: /* Get the child spe register state. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (void __user *) data); + datavp); case PTRACE_SETEVRREGS: /* Set the child spe register state. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (const void __user *) data); + datavp); #endif /* Old reverse args ptrace callss */ -- cgit v1.2.2 From ff10b88b5a05c8f1646dd15fb9f6093c1384ff6d Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Wed, 27 Oct 2010 21:47:00 -0500 Subject: kgdb,ppc: Individual register get/set for ppc commit 534af1082329392bc29f6badf815e69ae2ae0f4c(kgdb,kdb: individual register set and and get API) introduce dbg_get_reg/dbg_set_reg API for individual register get and set. This patch implement those APIs for ppc. Signed-off-by: Dongdong Deng Signed-off-by: Jason Wessel --- arch/powerpc/kernel/kgdb.c | 188 ++++++++++++++++++++++++++++++--------------- 1 file changed, 125 insertions(+), 63 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7f61a3ac787c..7a9db64f3f04 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -194,40 +194,6 @@ static int kgdb_dabr_match(struct pt_regs *regs) ptr = (unsigned long *)ptr32; \ } while (0) - -void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) -{ - unsigned long *ptr = gdb_regs; - int reg; - - memset(gdb_regs, 0, NUMREGBYTES); - - for (reg = 0; reg < 32; reg++) - PACK64(ptr, regs->gpr[reg]); - -#ifdef CONFIG_FSL_BOOKE -#ifdef CONFIG_SPE - for (reg = 0; reg < 32; reg++) - PACK64(ptr, current->thread.evr[reg]); -#else - ptr += 32; -#endif -#else - /* fp registers not used by kernel, leave zero */ - ptr += 32 * 8 / sizeof(long); -#endif - - PACK64(ptr, regs->nip); - PACK64(ptr, regs->msr); - PACK32(ptr, regs->ccr); - PACK64(ptr, regs->link); - PACK64(ptr, regs->ctr); - PACK32(ptr, regs->xer); - - BUG_ON((unsigned long)ptr > - (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); -} - void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + @@ -271,44 +237,140 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); } -#define UNPACK64(dest, ptr) do { dest = *(ptr++); } while (0) +#define GDB_SIZEOF_REG sizeof(unsigned long) +#define GDB_SIZEOF_REG_U32 sizeof(u32) -#define UNPACK32(dest, ptr) do { \ - u32 *ptr32; \ - ptr32 = (u32 *)ptr; \ - dest = *(ptr32++); \ - ptr = (unsigned long *)ptr32; \ - } while (0) +#ifdef CONFIG_FSL_BOOKE +#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long) +#else +#define GDB_SIZEOF_FLOAT_REG sizeof(u64) +#endif -void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { - unsigned long *ptr = gdb_regs; - int reg; - - for (reg = 0; reg < 32; reg++) - UNPACK64(regs->gpr[reg], ptr); + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) }, + + { "f0", GDB_SIZEOF_FLOAT_REG, 0 }, + { "f1", GDB_SIZEOF_FLOAT_REG, 1 }, + { "f2", GDB_SIZEOF_FLOAT_REG, 2 }, + { "f3", GDB_SIZEOF_FLOAT_REG, 3 }, + { "f4", GDB_SIZEOF_FLOAT_REG, 4 }, + { "f5", GDB_SIZEOF_FLOAT_REG, 5 }, + { "f6", GDB_SIZEOF_FLOAT_REG, 6 }, + { "f7", GDB_SIZEOF_FLOAT_REG, 7 }, + { "f8", GDB_SIZEOF_FLOAT_REG, 8 }, + { "f9", GDB_SIZEOF_FLOAT_REG, 9 }, + { "f10", GDB_SIZEOF_FLOAT_REG, 10 }, + { "f11", GDB_SIZEOF_FLOAT_REG, 11 }, + { "f12", GDB_SIZEOF_FLOAT_REG, 12 }, + { "f13", GDB_SIZEOF_FLOAT_REG, 13 }, + { "f14", GDB_SIZEOF_FLOAT_REG, 14 }, + { "f15", GDB_SIZEOF_FLOAT_REG, 15 }, + { "f16", GDB_SIZEOF_FLOAT_REG, 16 }, + { "f17", GDB_SIZEOF_FLOAT_REG, 17 }, + { "f18", GDB_SIZEOF_FLOAT_REG, 18 }, + { "f19", GDB_SIZEOF_FLOAT_REG, 19 }, + { "f20", GDB_SIZEOF_FLOAT_REG, 20 }, + { "f21", GDB_SIZEOF_FLOAT_REG, 21 }, + { "f22", GDB_SIZEOF_FLOAT_REG, 22 }, + { "f23", GDB_SIZEOF_FLOAT_REG, 23 }, + { "f24", GDB_SIZEOF_FLOAT_REG, 24 }, + { "f25", GDB_SIZEOF_FLOAT_REG, 25 }, + { "f26", GDB_SIZEOF_FLOAT_REG, 26 }, + { "f27", GDB_SIZEOF_FLOAT_REG, 27 }, + { "f28", GDB_SIZEOF_FLOAT_REG, 28 }, + { "f29", GDB_SIZEOF_FLOAT_REG, 29 }, + { "f30", GDB_SIZEOF_FLOAT_REG, 30 }, + { "f31", GDB_SIZEOF_FLOAT_REG, 31 }, + + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) }, + { "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) }, + { "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) }, + { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) }, + { "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) }, + { "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) }, +}; -#ifdef CONFIG_FSL_BOOKE -#ifdef CONFIG_SPE - for (reg = 0; reg < 32; reg++) - UNPACK64(current->thread.evr[reg], ptr); +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + if (current) + memcpy(mem, current->thread.evr[regno-32], + dbg_reg_def[regno].size); #else - ptr += 32; + /* fp registers not used by kernel, leave zero */ + memset(mem, 0, dbg_reg_def[regno].size); #endif + } + + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + memcpy(current->thread.evr[regno-32], mem, + dbg_reg_def[regno].size); #else - /* fp registers not used by kernel, leave zero */ - ptr += 32 * 8 / sizeof(int); + /* fp registers not used by kernel, leave zero */ + return 0; #endif + } - UNPACK64(regs->nip, ptr); - UNPACK64(regs->msr, ptr); - UNPACK32(regs->ccr, ptr); - UNPACK64(regs->link, ptr); - UNPACK64(regs->ctr, ptr); - UNPACK32(regs->xer, ptr); - - BUG_ON((unsigned long)ptr > - (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); + return 0; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) -- cgit v1.2.2 From 4b6ba8aacbb3185703b797286547d0f8f3859b02 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 26 Oct 2010 15:07:13 -0700 Subject: of/net: Move of_get_mac_address() to a common source file. There are two identical implementations of of_get_mac_address(), one each in arch/powerpc/kernel/prom_parse.c and arch/microblaze/kernel/prom_parse.c. Move this function to a new common file of_net.{c,h} and adjust all the callers to include the new header. Signed-off-by: David Daney [grant.likely@secretlab.ca: protect header with #ifdef] Signed-off-by: Grant Likely --- arch/powerpc/kernel/prom_parse.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 88334af038e5..c2b7a07cc3d3 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -117,41 +117,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, cells = prop ? *(u32 *)prop : of_n_size_cells(dn); *size = of_read_number(dma_window, cells); } - -/** - * Search the device tree for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the device tree, but were not set by U-Boot. For example, the - * DTS could define 'mac-address' and 'local-mac-address', with zero MAC - * addresses. Some older U-Boots only initialized 'local-mac-address'. In - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. -*/ -const void *of_get_mac_address(struct device_node *np) -{ - struct property *pp; - - pp = of_find_property(np, "mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "local-mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - return NULL; -} -EXPORT_SYMBOL(of_get_mac_address); -- cgit v1.2.2 From a36be1003a80197714fc2b6e198df2f31f9eb270 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 18 Oct 2010 17:35:48 -0500 Subject: PPC: KVM: Book E doesn't have __end_interrupts. Fix an unresolved symbol with CONFIG_KVM_GUEST plus CONFIG_RELOCATABLE on Book E. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 428d0e538aec..b06bdae04064 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -127,7 +127,7 @@ static void kvm_patch_ins_nop(u32 *inst) static void kvm_patch_ins_b(u32 *inst, int addr) { -#ifdef CONFIG_RELOCATABLE +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S) /* On relocatable kernels interrupts handlers and our code can be in different regions, so we don't patch them */ -- cgit v1.2.2 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/sys_ppc32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b1b6043a56c4..4e5bf1edc0f2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.2 From e3839ed8e89e79202c0402ac46965c0686897890 Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Tue, 16 Nov 2010 16:02:00 -0600 Subject: kgdb,ppc: Fix regression in evr register handling Commit ff10b88b5a05c8f1646dd15fb9f6093c1384ff6d (kgdb,ppc: Individual register get/set for ppc) introduced a problem where memcpy was used incorrectly to read and write the evr registers with a kernel that has: CONFIG_FSL_BOOKE=y CONFIG_SPE=y CONFIG_KGDB=y This patch also fixes the following compilation problems: arch/powerpc/kernel/kgdb.c: In function 'dbg_get_reg': arch/powerpc/kernel/kgdb.c:341: error: passing argument 2 of 'memcpy' makes pointer from integer without a cast arch/powerpc/kernel/kgdb.c: In function 'dbg_set_reg': arch/powerpc/kernel/kgdb.c:366: error: passing argument 1 of 'memcpy' makes pointer from integer without a cast [jason.wessel@windriver.com: Remove void * casts and fix patch header] Reported-by: Milton Miller Signed-off-by: Dongdong Deng Acked-by: Kumar Gala Signed-off-by: Jason Wessel CC: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a9db64f3f04..42850ee00ada 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -337,7 +337,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) /* FP registers 32 -> 63 */ #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) if (current) - memcpy(mem, current->thread.evr[regno-32], + memcpy(mem, ¤t->thread.evr[regno-32], dbg_reg_def[regno].size); #else /* fp registers not used by kernel, leave zero */ @@ -362,7 +362,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) - memcpy(current->thread.evr[regno-32], mem, + memcpy(¤t->thread.evr[regno-32], mem, dbg_reg_def[regno].size); #else /* fp registers not used by kernel, leave zero */ -- cgit v1.2.2 From 0f6b77ca12bea571e0a97b0588f62aa5f6012d61 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Tue, 16 Nov 2010 07:55:16 +0000 Subject: powerpc: Update a BKL related comment The commit 5e3d20a remove bkl from startup code so setup_arch() it isn't called with bkl held anymore. Update the comment on top of that function. Fix also a typo. This work was supported by a hardware donation from the CE Linux Forum. Signed-off-by: Alessio Igor Bogani Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a178b0ebcdf..ce6f61c6f871 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -497,9 +497,8 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { -- cgit v1.2.2 From 004417a6d468e24399e383645c068b498eed84ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Nov 2010 18:38:29 +0100 Subject: perf, arch: Cleanup perf-pmu init vs lockup-detector The perf hardware pmu got initialized at various points in the boot, some before early_initcall() some after (notably arch_initcall). The problem is that the NMI lockup detector is ran from early_initcall() and expects the hardware pmu to be present. Sanitize this by moving all architecture hardware pmu implementations to initialize at early_initcall() and move the lockup detector to an explicit initcall right after that. Cc: paulus Cc: davem Cc: Michael Cree Cc: Deng-Cheng Zhu Acked-by: Paul Mundt Acked-by: Will Deacon Signed-off-by: Peter Zijlstra LKML-Reference: <1290707759.2145.119.camel@laptop> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/e500-pmu.c | 2 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d8943..b150b510510f 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -arch_initcall(init_e500_pmu); +early_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f317..2cc5e0301d0b 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -arch_initcall(init_mpc7450_pmu); +early_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda635..ead8b3c2649e 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -arch_initcall(init_power4_pmu); +early_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d411..eca0ac595cb6 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -arch_initcall(init_power5p_pmu); +early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9dd..d5ff0f64a5e6 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -arch_initcall(init_power5_pmu); +early_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8fb..31603927e376 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -arch_initcall(init_power6_pmu); +early_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b40..593740fcb799 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -arch_initcall(init_power7_pmu); +early_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4df..9a6e093858fe 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -arch_initcall(init_ppc970_pmu); +early_initcall(init_ppc970_pmu); -- cgit v1.2.2 From 64ff31287693c1f325cb9cb049569c1611438ef1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Aug 2010 16:28:09 +0000 Subject: powerpc: Add support for popcnt instructions POWER5 added popcntb, and POWER7 added popcntw and popcntd. As a first step this patch does all the work out of line, but it would be nice to implement them as inlines with an out of line fallback. The performance issue with hweight was noticed when disabling SMT on a large (192 thread) POWER7 box. The patch improves that testcase by about 8%. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ppc_ksyms.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392ac63c..ef3ef566235e 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -186,3 +186,10 @@ EXPORT_SYMBOL(__mtdcr); EXPORT_SYMBOL(__mfdcr); #endif EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif -- cgit v1.2.2 From d72e063bb32c06c6c1cec14f6857b7c37ba62d7a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 24 Aug 2010 14:23:44 +0000 Subject: powerpc/kdump: Override crash_free_reserved_phys_range to avoid freeing RTAS The crashkernel region will almost always overlap RTAS. If we free the crashkernel region via "echo 0 > /sys/kernel/kexec_crash_size" then we will free RTAS and the machine will crash in confusing and exciting ways. Override crash_free_reserved_phys_range and check for overlap with RTAS. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash_dump.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 8e05c16344e4..0a2af50243cb 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -141,3 +142,35 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return csize; } + +#ifdef CONFIG_PPC_RTAS +/* + * The crashkernel region will almost always overlap the RTAS region, so + * we have to be careful when shrinking the crashkernel region. + */ +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + const u32 *basep, *sizep; + unsigned int rtas_start = 0, rtas_end = 0; + + basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = of_get_property(rtas.dev, "rtas-size", NULL); + + if (basep && sizep) { + rtas_start = *basep; + rtas_end = *basep + *sizep; + } + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* Does this page overlap with the RTAS region? */ + if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start)) + continue; + + ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); + init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} +#endif -- cgit v1.2.2 From 99d86705253dcf728dbbec4d694a6764b6edb70c Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Wed, 6 Oct 2010 08:36:59 +0000 Subject: powerpc: Cleanup APIs for cpu/thread/core mappings These APIs take logical cpu number as input Change cpu_first_thread_in_core() to cpu_first_thread_sibling() Change cpu_last_thread_in_core() to cpu_last_thread_sibling() These APIs convert core number (index) to logical cpu/thread numbers Add cpu_first_thread_of_core(int core) Changed cpu_thread_to_core() to cpu_core_index_of_thread(int cpu) The goal is to make 'threads_per_core' accessible to the pseries_energy module. Instead of making an API to read threads_per_core, this is a higher level wrapper function to convert from logical cpu number to core number. The current APIs cpu_first_thread_in_core() and cpu_last_thread_in_core() returns logical CPU number while cpu_thread_to_core() returns core number or index which is not a logical CPU number. The new APIs are now clearly named to distinguish 'core number' versus first and last 'logical cpu number' in that core. The new APIs cpu_{first,last}_thread_sibling() work on logical cpu numbers. While cpu_first_thread_of_core() and cpu_core_index_of_thread() work on core index. Example usage: (4 threads per core system) cpu_first_thread_sibling(5) = 4 cpu_last_thread_sibling(5) = 7 cpu_core_index_of_thread(5) = 1 cpu_first_thread_of_core(1) = 4 cpu_core_index_of_thread() is used in cpu_to_drc_index() in the module and cpu_first_thread_of_core() is used in drc_index_to_cpu() in the module. Make API changes to few callers. Export symbols for use in modules. Signed-off-by: Vaidyanathan Srinivasan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 68034bbf2e4f..981360509172 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -466,7 +466,20 @@ out: return id; } -/* Must be called when no change can occur to cpu_present_mask, +/* Helper routines for cpu to core mapping */ +int cpu_core_index_of_thread(int cpu) +{ + return cpu >> threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_core_index_of_thread); + +int cpu_first_thread_of_core(int core) +{ + return core << threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); + +/* Must be called when no change can occur to cpu_present_map, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) @@ -514,7 +527,7 @@ int __devinit start_secondary(void *unused) notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ - base = cpu_first_thread_in_core(cpu); + base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { if (cpu_is_offline(base + i)) continue; @@ -600,7 +613,7 @@ int __cpu_disable(void) return err; /* Update sibling maps */ - base = cpu_first_thread_in_core(cpu); + base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); -- cgit v1.2.2 From 6d283d782f9fbafee5c672bfdaff4c10f6fdc788 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Mon, 18 Oct 2010 07:26:59 +0000 Subject: powerpc/vio: Use dma ops helpers Use the set_dma_ops helper. Instead of modifying vio_dma_mapping_ops, just create a trivial wrapper for dma_supported. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 441d2a722f06..b2654058f2e4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -600,6 +600,11 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, vio_cmo_dealloc(viodev, alloc_size); } +static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) +{ + return dma_iommu_ops.dma_supported(dev, mask); +} + struct dma_map_ops vio_dma_mapping_ops = { .alloc_coherent = vio_dma_iommu_alloc_coherent, .free_coherent = vio_dma_iommu_free_coherent, @@ -607,6 +612,7 @@ struct dma_map_ops vio_dma_mapping_ops = { .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, .unmap_page = vio_dma_iommu_unmap_page, + .dma_supported = vio_dma_iommu_dma_supported, }; @@ -858,8 +864,7 @@ static void vio_cmo_bus_remove(struct vio_dev *viodev) static void vio_cmo_set_dma_ops(struct vio_dev *viodev) { - vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; - viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; + set_dma_ops(&viodev->dev, &vio_dma_mapping_ops); } /** @@ -1244,7 +1249,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_set_dma_ops(viodev); else - viodev->dev.archdata.dma_ops = &dma_iommu_ops; + set_dma_ops(&viodev->dev, &dma_iommu_ops); set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); -- cgit v1.2.2 From 698193d85aa8aba003bacd09e40bbd78474fb00f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 8 Nov 2010 17:31:36 +0000 Subject: powerpc: Consolidate obj-y assignments No need to have three of them. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 36c30f31ec93..3bb2a3e6a337 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -29,8 +29,10 @@ endif obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o \ - signal.o sysfs.o cacheinfo.o -obj-y += vdso32/ + signal.o sysfs.o cacheinfo.o time.o \ + prom.o traps.o setup-common.o \ + udbg.o misc.o io.o dma.o \ + misc_$(CONFIG_WORD_SIZE).o vdso32/ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o @@ -80,9 +82,6 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-y += time.o prom.o traps.o setup-common.o \ - udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o -- cgit v1.2.2 From 93fe56e99fb946fcd4244741bfc7af6638f1cac7 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 17 Nov 2010 18:52:42 +0000 Subject: powerpc: Remove unneeded cpu_setup/restore from POWER7 cputable entry These are not needed so just remove them Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 2 -- arch/powerpc/kernel/misc.S | 5 ----- 2 files changed, 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 96a908f1cd87..75062cb40e7b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -457,8 +457,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752cbe16..b69463ec2010 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -122,8 +122,3 @@ _GLOBAL(longjmp) mtlr r0 mr r3,r4 blr - -_GLOBAL(__setup_cpu_power7) -_GLOBAL(__restore_cpu_power7) - /* place holder */ - blr -- cgit v1.2.2 From 1d32bb1827da3ebb413f6cb492990a42ab030559 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 17 Nov 2010 18:52:43 +0000 Subject: powerpc: Remove POWER6 oprofile workarounds for POWER7 These are not needed on POWER7 so remove them. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 75062cb40e7b..65813ee98b7d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -459,10 +459,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, - .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, - .oprofile_mmcra_clear = POWER6_MMCRA_THRM | - POWER6_MMCRA_OTHER, .platform = "power7", }, { /* Cell Broadband Engine */ -- cgit v1.2.2 From 6f08cb3be6345bc354e48131f7466766db4d355a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 17 Nov 2010 18:52:44 +0000 Subject: powerpc: Add POWER7+ cputable entry This adds the POWER7+ cputable entry for the PVR 0x004a0000. Rest is the same as vanilla POWER7. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 65813ee98b7d..be5ab18b03b5 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -461,6 +461,22 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power7", }, + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .mmu_features = MMU_FTR_HPTE_TABLE | + MMU_FTR_TLBIE_206, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power7", + .oprofile_type = PPC_OPROFILE_POWER4, + .platform = "power7+", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, -- cgit v1.2.2 From 46f5221049bb46b0188aad6b6dfab5dbc778be22 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 18 Nov 2010 15:06:17 +0000 Subject: powerpc: Remove second definition of STACK_FRAME_OVERHEAD Since STACK_FRAME_OVERHEAD is defined in asm/ptrace.h and that is ASSEMBER safe, we can just include that instead of going via asm-offsets.h. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/entry_32.S | 1 + arch/powerpc/kernel/exceptions-64s.S | 1 + arch/powerpc/kernel/fpu.S | 1 + arch/powerpc/kernel/head_40x.S | 1 + arch/powerpc/kernel/head_44x.S | 1 + arch/powerpc/kernel/head_64.S | 1 + arch/powerpc/kernel/head_8xx.S | 1 + arch/powerpc/kernel/head_fsl_booke.S | 1 + arch/powerpc/kernel/misc_32.S | 1 + arch/powerpc/kernel/misc_64.S | 1 + arch/powerpc/kernel/ppc_save_regs.S | 1 + arch/powerpc/kernel/vector.S | 1 + 13 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bd0df2e6aa8f..23e6a93145ab 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -209,7 +209,6 @@ int main(void) DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); /* Interrupt register frame */ - DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ed4aeb96398b..c22dc1ec1c94 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -31,6 +31,7 @@ #include #include #include +#include #undef SHOW_SYSCALLS #undef SHOW_SYSCALLS_TASK diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9f8b01d6466f..8a817995b4cd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -13,6 +13,7 @@ */ #include +#include /* * We layout physical memory as follows: diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index e86c040ae585..de369558bf0a 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef CONFIG_VSX #define REST_32FPVSRS(n,c,base) \ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 8278e8bad5a0..9dd21a8c4d52 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -40,6 +40,7 @@ #include #include #include +#include /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 562305b40a8e..cbb3436b592d 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f0dd577e4a5b..ce41b97eb512 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -38,6 +38,7 @@ #include #include #include +#include /* The physical memory is layed out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1f1a04b5c2a4..1cbf64e6b416 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,6 +29,7 @@ #include #include #include +#include /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 529b817f473b..3e02710d9562 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -41,6 +41,7 @@ #include #include #include +#include #include "head_booke.h" /* As with the other PowerPC ports, it is expected that when code diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index a7a570dcdd57..094bd9821ad4 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -30,6 +30,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index e5144906a56d..206a321a71d3 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -25,6 +25,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 5113bd2285e1..e83ba3f078e4 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -11,6 +11,7 @@ #include #include #include +#include /* * Grab the register values as they are now. diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fe460482fa68..9de6f396cf85 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -5,6 +5,7 @@ #include #include #include +#include /* * load_up_altivec(unused, unused, tsk) -- cgit v1.2.2 From 74d51d029818eca9d1aec22dd2895e269c0044b1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 14:45:24 +1000 Subject: powerpc/nvram: Move things out of asm/nvram.h This moves a bunch of definitions out of asm/nvram.h to the files that use them or just outright remove completely unused stuff. We leave the partition signatures definitions, they will be useful Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 9cf197f01e94..a8154f1813df 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -34,6 +34,25 @@ #undef DEBUG_NVRAM +#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */ +#define NVRAM_BLOCK_LEN 16 +#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN) +#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN) + +/* If change this size, then change the size of NVNAME_LEN */ +struct nvram_header { + unsigned char signature; + unsigned char checksum; + unsigned short length; + char name[12]; +}; + +struct nvram_partition { + struct list_head partition; + struct nvram_header header; + unsigned int index; +}; + static struct nvram_partition * nvram_part; static long nvram_error_log_index = -1; static long nvram_error_log_size = 0; @@ -432,7 +451,7 @@ static int __init nvram_setup_partition(void) } /* try creating a partition with the free space we have */ - rc = nvram_create_os_partition(); + rc = nvram_create_partition("ppc64,linux", ); if (!rc) { return 0; } -- cgit v1.2.2 From 4e7c77a385efac81d6677a4a761b1b66cd2cb59e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 15:28:20 +1000 Subject: powerpc/nvram: More flexible nvram_create_partition() Replace nvram_create_os_partition() with a variant that takes the partition name, signature and size as arguments. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 46 +++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index a8154f1813df..9e133355f742 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -307,13 +307,15 @@ static int __init nvram_remove_os_partition(void) return 0; } -/* nvram_create_os_partition - * - * Create a OS linux partition to buffer error logs. - * Will create a partition starting at the first free - * space found if space has enough room. +/** + * nvram_create_partition - Create a partition in nvram + * @name: name of the partition to create + * @sig: signature of the partition to create + * @req_size: size to allocate preferrably + * @min_size: minimum acceptable size (0 means req_size) */ -static int __init nvram_create_os_partition(void) +static int __init nvram_create_partition(const char *name, int sig, + int req_size, int min_size) { struct nvram_partition *part; struct nvram_partition *new_part; @@ -322,20 +324,27 @@ static int __init nvram_create_os_partition(void) loff_t tmp_index; long size = 0; int rc; - + + /* If no minimum size specified, make it the same as the + * requested size + */ + if (min_size == 0) + min_size = req_size; + /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ list_for_each_entry(part, &nvram_part->partition, partition) { if (part->header.signature != NVRAM_SIG_FREE) continue; - if (part->header.length >= NVRAM_MAX_REQ) { - size = NVRAM_MAX_REQ; + if (part->header.length >= req_size) { + size = req_size; free_part = part; break; } - if (!size && part->header.length >= NVRAM_MIN_REQ) { - size = NVRAM_MIN_REQ; + if (part->header.length > size && + part->header.length >= min_size) { + size = part->header.length; free_part = part; } } @@ -350,9 +359,9 @@ static int __init nvram_create_os_partition(void) } new_part->index = free_part->index; - new_part->header.signature = NVRAM_SIG_OS; + new_part->header.signature = sig; new_part->header.length = size; - strcpy(new_part->header.name, "ppc64,linux"); + strncpy(new_part->header.name, name, 12); new_part->header.checksum = nvram_checksum(&new_part->header); rc = nvram_write_header(new_part); @@ -451,10 +460,10 @@ static int __init nvram_setup_partition(void) } /* try creating a partition with the free space we have */ - rc = nvram_create_partition("ppc64,linux", ); - if (!rc) { + rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + NVRAM_MAX_REQ, NVRAM_MIN_REQ); + if (!rc) return 0; - } /* need to free up some space */ rc = nvram_remove_os_partition(); @@ -463,9 +472,10 @@ static int __init nvram_setup_partition(void) } /* create a partition in this new space */ - rc = nvram_create_os_partition(); + rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + NVRAM_MAX_REQ, NVRAM_MIN_REQ); if (rc) { - printk(KERN_ERR "nvram_create_os_partition: Could not find a " + printk(KERN_ERR "nvram_create_partition: Could not find a " "NVRAM partition large enough\n"); return rc; } -- cgit v1.2.2 From 36673307aee535f951f4eede81049c6962bc4ba9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 18:18:44 +1000 Subject: powerpc/nvram: nvram_create_partitions() now uses bytes This converts nvram_create_partition() to use a size in bytes rather than blocks. It does the appropriate alignment internally The size passed is also the data size (ie. doesn't include the header anymore). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 9e133355f742..a5a5587121a7 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -34,10 +34,10 @@ #undef DEBUG_NVRAM -#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */ -#define NVRAM_BLOCK_LEN 16 -#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN) -#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN) +#define NVRAM_HEADER_LEN sizeof(struct nvram_header) +#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN +#define NVRAM_MAX_REQ 2079 +#define NVRAM_MIN_REQ 1055 /* If change this size, then change the size of NVNAME_LEN */ struct nvram_header { @@ -311,7 +311,7 @@ static int __init nvram_remove_os_partition(void) * nvram_create_partition - Create a partition in nvram * @name: name of the partition to create * @sig: signature of the partition to create - * @req_size: size to allocate preferrably + * @req_size: size of data to allocate in bytes * @min_size: minimum acceptable size (0 means req_size) */ static int __init nvram_create_partition(const char *name, int sig, @@ -325,12 +325,20 @@ static int __init nvram_create_partition(const char *name, int sig, long size = 0; int rc; + /* Convert sizes from bytes to blocks */ + req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + /* If no minimum size specified, make it the same as the * requested size */ if (min_size == 0) min_size = req_size; + /* Now add one block to each for the header */ + req_size += 1; + min_size += 1; + /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ list_for_each_entry(part, &nvram_part->partition, partition) { @@ -450,7 +458,7 @@ static int __init nvram_setup_partition(void) if (strcmp(part->header.name, "ppc64,linux")) continue; - if (part->header.length >= NVRAM_MIN_REQ) { + if ((part->header.length - 1) * NVRAM_BLOCK_LEN >= NVRAM_MIN_REQ) { /* found our partition */ nvram_error_log_index = part->index + NVRAM_HEADER_LEN; nvram_error_log_size = ((part->header.length - 1) * -- cgit v1.2.2 From 578914cffc283b907777796420148d582072cbae Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 17:21:17 +1000 Subject: powerpc/nvram: Ensure that the partition header/block size is right Use BUILD_BUG_ON to ensure the structure representing a partition header have the right size. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index a5a5587121a7..f7538820c03d 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -565,6 +565,8 @@ static int __init nvram_init(void) int error; int rc; + BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) return -ENODEV; -- cgit v1.2.2 From cef0d5ad62ec6e0c8456b8f58e898aa3219311a5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 17:22:34 +1000 Subject: powerpc/nvram: Completely clear a new partition When creating a partition, we clear it entirely rather than just the first two words since the previous code was rather specific to the pseries log partition format. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index f7538820c03d..02737e687559 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -320,7 +320,7 @@ static int __init nvram_create_partition(const char *name, int sig, struct nvram_partition *part; struct nvram_partition *new_part; struct nvram_partition *free_part = NULL; - int seq_init[2] = { 0, 0 }; + static char nv_init_vals[16]; loff_t tmp_index; long size = 0; int rc; @@ -379,14 +379,15 @@ static int __init nvram_create_partition(const char *name, int sig, return rc; } - /* make sure and initialize to zero the sequence number and the error - type logged */ - tmp_index = new_part->index + NVRAM_HEADER_LEN; - rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write " - "failed (%d)\n", rc); - return rc; + /* Clear the partition */ + for (tmp_index = new_part->index + NVRAM_HEADER_LEN; + tmp_index < ((size - 1) * NVRAM_BLOCK_LEN); + tmp_index += NVRAM_BLOCK_LEN) { + rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index); + if (rc <= 0) { + pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc); + return rc; + } } nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN; -- cgit v1.2.2 From e49e2e87235518c21b1f5228809209831e6169e7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 17:38:55 +1000 Subject: powerpc/nvram: Shuffle code around in nvram_create_partition() This error log stuff is really pseries specific. As a first step we move the initialization of these variables to the caller of nvram_create_partition(), which is also slightly reorganized so we setup the free partition before we clear the new partition, so the chance of an error during clear leaving us with invalid headers is lessened. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 108 +++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 02737e687559..eabee7c61183 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -313,9 +313,15 @@ static int __init nvram_remove_os_partition(void) * @sig: signature of the partition to create * @req_size: size of data to allocate in bytes * @min_size: minimum acceptable size (0 means req_size) + * + * Returns a negative error code or a positive nvram index + * of the beginning of the data area of the newly created + * partition. If you provided a min_size smaller than req_size + * you need to query for the actual size yourself after the + * call using nvram_partition_get_size(). */ -static int __init nvram_create_partition(const char *name, int sig, - int req_size, int min_size) +static loff_t __init nvram_create_partition(const char *name, int sig, + int req_size, int min_size) { struct nvram_partition *part; struct nvram_partition *new_part; @@ -334,6 +340,8 @@ static int __init nvram_create_partition(const char *name, int sig, */ if (min_size == 0) min_size = req_size; + if (min_size > req_size) + return -EINVAL; /* Now add one block to each for the header */ req_size += 1; @@ -362,7 +370,7 @@ static int __init nvram_create_partition(const char *name, int sig, /* Create our OS partition */ new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); if (!new_part) { - printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n"); + pr_err("nvram_create_os_partition: kmalloc failed\n"); return -ENOMEM; } @@ -374,12 +382,29 @@ static int __init nvram_create_partition(const char *name, int sig, rc = nvram_write_header(new_part); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " - "failed (%d)\n", rc); + pr_err("nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); return rc; } + list_add_tail(&new_part->partition, &free_part->partition); + + /* Adjust or remove the partition we stole the space from */ + if (free_part->header.length > size) { + free_part->index += size * NVRAM_BLOCK_LEN; + free_part->header.length -= size; + free_part->header.checksum = nvram_checksum(&free_part->header); + rc = nvram_write_header(free_part); + if (rc <= 0) { + pr_err("nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + } else { + list_del(&free_part->partition); + kfree(free_part); + } - /* Clear the partition */ + /* Clear the new partition */ for (tmp_index = new_part->index + NVRAM_HEADER_LEN; tmp_index < ((size - 1) * NVRAM_BLOCK_LEN); tmp_index += NVRAM_BLOCK_LEN) { @@ -390,31 +415,24 @@ static int __init nvram_create_partition(const char *name, int sig, } } - nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN; - nvram_error_log_size = ((part->header.length - 1) * - NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); - - list_add_tail(&new_part->partition, &free_part->partition); - - if (free_part->header.length <= size) { - list_del(&free_part->partition); - kfree(free_part); - return 0; - } + return new_part->index + NVRAM_HEADER_LEN; +} - /* Adjust the partition we stole the space from */ - free_part->index += size * NVRAM_BLOCK_LEN; - free_part->header.length -= size; - free_part->header.checksum = nvram_checksum(&free_part->header); +/** + * nvram_get_partition_size - Get the data size of an nvram partition + * @data_index: This is the offset of the start of the data of + * the partition. The same value that is returned by + * nvram_create_partition(). + */ +static int nvram_get_partition_size(loff_t data_index) +{ + struct nvram_partition *part; - rc = nvram_write_header(free_part); - if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " - "failed (%d)\n", rc); - return rc; + list_for_each_entry(part, &nvram_part->partition, partition) { + if (part->index + NVRAM_HEADER_LEN == data_index) + return (part->header.length - 1) * NVRAM_BLOCK_LEN; } - - return 0; + return -1; } @@ -469,30 +487,28 @@ static int __init nvram_setup_partition(void) } /* try creating a partition with the free space we have */ - rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, NVRAM_MAX_REQ, NVRAM_MIN_REQ); - if (!rc) - return 0; - - /* need to free up some space */ - rc = nvram_remove_os_partition(); - if (rc) { - return rc; - } - - /* create a partition in this new space */ - rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, - NVRAM_MAX_REQ, NVRAM_MIN_REQ); - if (rc) { - printk(KERN_ERR "nvram_create_partition: Could not find a " - "NVRAM partition large enough\n"); - return rc; + if (rc < 0) { + /* need to free up some space */ + rc = nvram_remove_os_partition(); + if (rc) + return rc; + /* create a partition in this new space */ + rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + NVRAM_MAX_REQ, NVRAM_MIN_REQ); + if (rc < 0) { + pr_err("nvram_create_partition: Could not find" + " enough space in NVRAM for partition\n"); + return rc; + } } + nvram_error_log_index = rc; + nvram_error_log_size = nvram_get_partition_size(rc) - sizeof(struct err_log_info); return 0; } - static int __init nvram_scan_partitions(void) { loff_t cur_index = 0; -- cgit v1.2.2 From fa2b4e54d41f3c9f1bee6a7d63ecd4f0ab161e89 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 29 Jul 2010 18:19:59 +1000 Subject: powerpc/nvram: Improve partition removal Existing code is nasty, has bugs etc... rewrite the function more simply, and make it take the signature and optional name of the partitions to remove as arguments, thus making it a more generic utility. We also try to remove a log partition that we find and is too small rather than creating a duplicate. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 91 ++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index eabee7c61183..6dd2700852f0 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -247,61 +247,54 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) return c_sum; } -static int __init nvram_remove_os_partition(void) +/** + * nvram_remove_partition - Remove one or more partitions in nvram + * @name: name of the partition to remove, or NULL for a + * signature only match + * @sig: signature of the partition(s) to remove + */ + +static int __init nvram_remove_partition(const char *name, int sig) { - struct list_head *i; - struct list_head *j; - struct nvram_partition * part; - struct nvram_partition * cur_part; + struct nvram_partition *part, *prev, *tmp; int rc; - list_for_each(i, &nvram_part->partition) { - part = list_entry(i, struct nvram_partition, partition); - if (part->header.signature != NVRAM_SIG_OS) + list_for_each_entry(part, &nvram_part->partition, partition) { + if (part->header.signature != sig) continue; - - /* Make os partition a free partition */ + if (name && strncmp(name, part->header.name, 12)) + continue; + + /* Make partition a free partition */ part->header.signature = NVRAM_SIG_FREE; sprintf(part->header.name, "wwwwwwwwwwww"); part->header.checksum = nvram_checksum(&part->header); - - /* Merge contiguous free partitions backwards */ - list_for_each_prev(j, &part->partition) { - cur_part = list_entry(j, struct nvram_partition, partition); - if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { - break; - } - - part->header.length += cur_part->header.length; - part->header.checksum = nvram_checksum(&part->header); - part->index = cur_part->index; - - list_del(&cur_part->partition); - kfree(cur_part); - j = &part->partition; /* fixup our loop */ - } - - /* Merge contiguous free partitions forwards */ - list_for_each(j, &part->partition) { - cur_part = list_entry(j, struct nvram_partition, partition); - if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { - break; - } - - part->header.length += cur_part->header.length; - part->header.checksum = nvram_checksum(&part->header); - - list_del(&cur_part->partition); - kfree(cur_part); - j = &part->partition; /* fixup our loop */ - } - rc = nvram_write_header(part); if (rc <= 0) { - printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc); + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); return rc; } + } + /* Merge contiguous ones */ + prev = NULL; + list_for_each_entry_safe(part, tmp, &nvram_part->partition, partition) { + if (part->header.signature != NVRAM_SIG_FREE) { + prev = NULL; + continue; + } + if (prev) { + prev->header.length += part->header.length; + prev->header.checksum = nvram_checksum(&part->header); + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); + return rc; + } + list_del(&part->partition); + kfree(part); + } else + prev = part; } return 0; @@ -484,17 +477,19 @@ static int __init nvram_setup_partition(void) NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); return 0; } + + /* Found one but it's too small, remove it */ + nvram_remove_partition("ppc64,linux", NVRAM_SIG_OS); } /* try creating a partition with the free space we have */ rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, NVRAM_MAX_REQ, NVRAM_MIN_REQ); if (rc < 0) { - /* need to free up some space */ - rc = nvram_remove_os_partition(); - if (rc) - return rc; - /* create a partition in this new space */ + /* need to free up some space, remove any "OS" partition */ + nvram_remove_partition(NULL, NVRAM_SIG_OS); + + /* Try again */ rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, NVRAM_MAX_REQ, NVRAM_MIN_REQ); if (rc < 0) { -- cgit v1.2.2 From cf5cbf9f8085eb45316d6e3c888a77cc50696701 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 2 Aug 2010 10:01:58 +1000 Subject: powerpc/nvram: Add nvram_find_partition() Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 6dd2700852f0..01e6844be8d7 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -429,6 +429,28 @@ static int nvram_get_partition_size(loff_t data_index) } +/** + * nvram_find_partition - Find an nvram partition by signature and name + * @name: Name of the partition or NULL for any name + * @sig: Signature to test against + * @out_size: if non-NULL, returns the size of the data part of the partition + */ +loff_t nvram_find_partition(const char *name, int sig, int *out_size) +{ + struct nvram_partition *p; + + list_for_each_entry(p, &nvram_part->partition, partition) { + if (p->header.signature == sig && + (!name || !strncmp(p->header.name, name, 12))) { + if (out_size) + *out_size = (p->header.length - 1) * + NVRAM_BLOCK_LEN; + return p->index + NVRAM_HEADER_LEN; + } + } + return 0; +} + /* nvram_setup_partition * * This will setup the partition we need for buffering the -- cgit v1.2.2 From d9626947f20b3dc0992e4ac28b477f7601f8f16e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 2 Aug 2010 10:13:56 +1000 Subject: powerpc/nvram: Change nvram_setup_partition() to use new helper This changes the function to use nvram_find_partition() instead of doing the lookup "by hand". It also makes some of the logic clearer and prints out more useful diagnostic information. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 71 +++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 01e6844be8d7..76f546b9944d 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -469,9 +469,8 @@ loff_t nvram_find_partition(const char *name, int sig, int *out_size) */ static int __init nvram_setup_partition(void) { - struct list_head * p; - struct nvram_partition * part; - int rc; + loff_t p; + int size; /* For now, we don't do any of this on pmac, until I * have figured out if it's worth killing some unused stuffs @@ -481,48 +480,42 @@ static int __init nvram_setup_partition(void) if (machine_is(powermac)) return -ENOSPC; - /* see if we have an OS partition that meets our needs. - will try getting the max we need. If not we'll delete - partitions and try again. */ - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); - if (part->header.signature != NVRAM_SIG_OS) - continue; - - if (strcmp(part->header.name, "ppc64,linux")) - continue; - - if ((part->header.length - 1) * NVRAM_BLOCK_LEN >= NVRAM_MIN_REQ) { - /* found our partition */ - nvram_error_log_index = part->index + NVRAM_HEADER_LEN; - nvram_error_log_size = ((part->header.length - 1) * - NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); - return 0; - } + p = nvram_find_partition("ppc64,linux", NVRAM_SIG_OS, &size); - /* Found one but it's too small, remove it */ + /* Found one but too small, remove it */ + if (p && size < NVRAM_MIN_REQ) { + pr_info("nvram: Found too small ppc64,linux partition" + ",removing it..."); nvram_remove_partition("ppc64,linux", NVRAM_SIG_OS); + p = 0; } - - /* try creating a partition with the free space we have */ - rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, - NVRAM_MAX_REQ, NVRAM_MIN_REQ); - if (rc < 0) { - /* need to free up some space, remove any "OS" partition */ - nvram_remove_partition(NULL, NVRAM_SIG_OS); - - /* Try again */ - rc = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, - NVRAM_MAX_REQ, NVRAM_MIN_REQ); - if (rc < 0) { - pr_err("nvram_create_partition: Could not find" - " enough space in NVRAM for partition\n"); - return rc; + + /* Create one if we didn't find */ + if (!p) { + p = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + NVRAM_MAX_REQ, NVRAM_MIN_REQ); + /* No room for it, try to get rid of any OS partition + * and try again + */ + if (p == -ENOSPC) { + pr_info("nvram: No room to create ppc64,linux" + " partition, deleting all OS partitions..."); + nvram_remove_partition(NULL, NVRAM_SIG_OS); + p = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, + NVRAM_MAX_REQ, NVRAM_MIN_REQ); } } + + if (p <= 0) { + pr_err("nvram: Failed to find or create ppc64,linux" + " partition, err %d\n", (int)p); + return 0; + } + + nvram_error_log_index = p; + nvram_error_log_size = nvram_get_partition_size(p) - + sizeof(struct err_log_info); - nvram_error_log_index = rc; - nvram_error_log_size = nvram_get_partition_size(rc) - sizeof(struct err_log_info); return 0; } -- cgit v1.2.2 From edc79a2f3ee1c74d915f6a0ce3cb22bf468f5ad5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 2 Aug 2010 11:18:09 +1000 Subject: powerpc/nvram: Move the log partition stuff to pseries The nvram log partition stuff currently in nvram_64.c is really pseries specific. It isn't actually used on anything else (despite the fact that we ran the code to setup the partition on anything except powermac) and the log format is specific to pseries RTAS implementation. So move it where it belongs Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 254 +++-------------------------------------- 1 file changed, 18 insertions(+), 236 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 76f546b9944d..125d86cf0afc 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -36,8 +36,6 @@ #define NVRAM_HEADER_LEN sizeof(struct nvram_header) #define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN -#define NVRAM_MAX_REQ 2079 -#define NVRAM_MIN_REQ 1055 /* If change this size, then change the size of NVNAME_LEN */ struct nvram_header { @@ -54,13 +52,6 @@ struct nvram_partition { }; static struct nvram_partition * nvram_part; -static long nvram_error_log_index = -1; -static long nvram_error_log_size = 0; - -struct err_log_info { - int error_type; - unsigned int seq_num; -}; static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) { @@ -254,7 +245,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) * @sig: signature of the partition(s) to remove */ -static int __init nvram_remove_partition(const char *name, int sig) +int __init nvram_remove_partition(const char *name, int sig) { struct nvram_partition *part, *prev, *tmp; int rc; @@ -313,8 +304,8 @@ static int __init nvram_remove_partition(const char *name, int sig) * you need to query for the actual size yourself after the * call using nvram_partition_get_size(). */ -static loff_t __init nvram_create_partition(const char *name, int sig, - int req_size, int min_size) +loff_t __init nvram_create_partition(const char *name, int sig, + int req_size, int min_size) { struct nvram_partition *part; struct nvram_partition *new_part; @@ -417,7 +408,7 @@ static loff_t __init nvram_create_partition(const char *name, int sig, * the partition. The same value that is returned by * nvram_create_partition(). */ -static int nvram_get_partition_size(loff_t data_index) +int nvram_get_partition_size(loff_t data_index) { struct nvram_partition *part; @@ -451,75 +442,7 @@ loff_t nvram_find_partition(const char *name, int sig, int *out_size) return 0; } -/* nvram_setup_partition - * - * This will setup the partition we need for buffering the - * error logs and cleanup partitions if needed. - * - * The general strategy is the following: - * 1.) If there is ppc64,linux partition large enough then use it. - * 2.) If there is not a ppc64,linux partition large enough, search - * for a free partition that is large enough. - * 3.) If there is not a free partition large enough remove - * _all_ OS partitions and consolidate the space. - * 4.) Will first try getting a chunk that will satisfy the maximum - * error log size (NVRAM_MAX_REQ). - * 5.) If the max chunk cannot be allocated then try finding a chunk - * that will satisfy the minum needed (NVRAM_MIN_REQ). - */ -static int __init nvram_setup_partition(void) -{ - loff_t p; - int size; - - /* For now, we don't do any of this on pmac, until I - * have figured out if it's worth killing some unused stuffs - * in our nvram, as Apple defined partitions use pretty much - * all of the space - */ - if (machine_is(powermac)) - return -ENOSPC; - - p = nvram_find_partition("ppc64,linux", NVRAM_SIG_OS, &size); - - /* Found one but too small, remove it */ - if (p && size < NVRAM_MIN_REQ) { - pr_info("nvram: Found too small ppc64,linux partition" - ",removing it..."); - nvram_remove_partition("ppc64,linux", NVRAM_SIG_OS); - p = 0; - } - - /* Create one if we didn't find */ - if (!p) { - p = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, - NVRAM_MAX_REQ, NVRAM_MIN_REQ); - /* No room for it, try to get rid of any OS partition - * and try again - */ - if (p == -ENOSPC) { - pr_info("nvram: No room to create ppc64,linux" - " partition, deleting all OS partitions..."); - nvram_remove_partition(NULL, NVRAM_SIG_OS); - p = nvram_create_partition("ppc64,linux", NVRAM_SIG_OS, - NVRAM_MAX_REQ, NVRAM_MIN_REQ); - } - } - - if (p <= 0) { - pr_err("nvram: Failed to find or create ppc64,linux" - " partition, err %d\n", (int)p); - return 0; - } - - nvram_error_log_index = p; - nvram_error_log_size = nvram_get_partition_size(p) - - sizeof(struct err_log_info); - - return 0; -} - -static int __init nvram_scan_partitions(void) +int __init nvram_scan_partitions(void) { loff_t cur_index = 0; struct nvram_header phead; @@ -529,7 +452,15 @@ static int __init nvram_scan_partitions(void) int total_size; int err; - if (ppc_md.nvram_size == NULL) + /* Initialize our anchor for the nvram partition list */ + nvram_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!nvram_part) { + printk(KERN_ERR "nvram_init: Failed kmalloc\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&nvram_part->partition); + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) return -ENODEV; total_size = ppc_md.nvram_size(); @@ -582,6 +513,10 @@ static int __init nvram_scan_partitions(void) } err = 0; +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + out: kfree(header); return err; @@ -589,7 +524,6 @@ static int __init nvram_scan_partitions(void) static int __init nvram_init(void) { - int error; int rc; BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); @@ -603,29 +537,6 @@ static int __init nvram_init(void) return rc; } - /* initialize our anchor for the nvram partition list */ - nvram_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); - if (!nvram_part) { - printk(KERN_ERR "nvram_init: Failed kmalloc\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&nvram_part->partition); - - /* Get all the NVRAM partitions */ - error = nvram_scan_partitions(); - if (error) { - printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); - return error; - } - - if(nvram_setup_partition()) - printk(KERN_WARNING "nvram_init: Could not find nvram partition" - " for nvram buffered error logging.\n"); - -#ifdef DEBUG_NVRAM - nvram_print_partitions("NVRAM Partitions"); -#endif - return rc; } @@ -634,135 +545,6 @@ void __exit nvram_cleanup(void) misc_deregister( &nvram_dev ); } - -#ifdef CONFIG_PPC_PSERIES - -/* nvram_write_error_log - * - * We need to buffer the error logs into nvram to ensure that we have - * the failure information to decode. If we have a severe error there - * is no way to guarantee that the OS or the machine is in a state to - * get back to user land and write the error to disk. For example if - * the SCSI device driver causes a Machine Check by writing to a bad - * IO address, there is no way of guaranteeing that the device driver - * is in any state that is would also be able to write the error data - * captured to disk, thus we buffer it in NVRAM for analysis on the - * next boot. - * - * In NVRAM the partition containing the error log buffer will looks like: - * Header (in bytes): - * +-----------+----------+--------+------------+------------------+ - * | signature | checksum | length | name | data | - * |0 |1 |2 3|4 15|16 length-1| - * +-----------+----------+--------+------------+------------------+ - * - * The 'data' section would look like (in bytes): - * +--------------+------------+-----------------------------------+ - * | event_logged | sequence # | error log | - * |0 3|4 7|8 nvram_error_log_size-1| - * +--------------+------------+-----------------------------------+ - * - * event_logged: 0 if event has not been logged to syslog, 1 if it has - * sequence #: The unique sequence # for each event. (until it wraps) - * error log: The error log from event_scan - */ -int nvram_write_error_log(char * buff, int length, - unsigned int err_type, unsigned int error_log_cnt) -{ - int rc; - loff_t tmp_index; - struct err_log_info info; - - if (nvram_error_log_index == -1) { - return -ESPIPE; - } - - if (length > nvram_error_log_size) { - length = nvram_error_log_size; - } - - info.error_type = err_type; - info.seq_num = error_log_cnt; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - rc = ppc_md.nvram_write(buff, length, &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - return 0; -} - -/* nvram_read_error_log - * - * Reads nvram for error log for at most 'length' - */ -int nvram_read_error_log(char * buff, int length, - unsigned int * err_type, unsigned int * error_log_cnt) -{ - int rc; - loff_t tmp_index; - struct err_log_info info; - - if (nvram_error_log_index == -1) - return -1; - - if (length > nvram_error_log_size) - length = nvram_error_log_size; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; - } - - rc = ppc_md.nvram_read(buff, length, &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; - } - - *error_log_cnt = info.seq_num; - *err_type = info.error_type; - - return 0; -} - -/* This doesn't actually zero anything, but it sets the event_logged - * word to tell that this event is safely in syslog. - */ -int nvram_clear_error_log(void) -{ - loff_t tmp_index; - int clear_word = ERR_FLAG_ALREADY_LOGGED; - int rc; - - if (nvram_error_log_index == -1) - return -1; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - return 0; -} - -#endif /* CONFIG_PPC_PSERIES */ - module_init(nvram_init); module_exit(nvram_cleanup); MODULE_LICENSE("GPL"); -- cgit v1.2.2 From 690d1a9bd14bd861328ca66473a223f60cf1ad31 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Thu, 11 Nov 2010 18:54:22 +0000 Subject: powerpc/nvram: Fix NVRAM partition list setup Simplify creation and use of the NVRAM partition list. Signed-off-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 125d86cf0afc..b8a50fa5875b 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -51,7 +51,7 @@ struct nvram_partition { unsigned int index; }; -static struct nvram_partition * nvram_part; +static LIST_HEAD(nvram_partitions); static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) { @@ -196,13 +196,11 @@ static struct miscdevice nvram_dev = { #ifdef DEBUG_NVRAM static void __init nvram_print_partitions(char * label) { - struct list_head * p; struct nvram_partition * tmp_part; printk(KERN_WARNING "--------%s---------\n", label); printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); - list_for_each(p, &nvram_part->partition) { - tmp_part = list_entry(p, struct nvram_partition, partition); + list_for_each_entry(tmp_part, &nvram_partitions, partition) { printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%s\n", tmp_part->index, tmp_part->header.signature, tmp_part->header.checksum, tmp_part->header.length, @@ -250,7 +248,7 @@ int __init nvram_remove_partition(const char *name, int sig) struct nvram_partition *part, *prev, *tmp; int rc; - list_for_each_entry(part, &nvram_part->partition, partition) { + list_for_each_entry(part, &nvram_partitions, partition) { if (part->header.signature != sig) continue; if (name && strncmp(name, part->header.name, 12)) @@ -269,7 +267,7 @@ int __init nvram_remove_partition(const char *name, int sig) /* Merge contiguous ones */ prev = NULL; - list_for_each_entry_safe(part, tmp, &nvram_part->partition, partition) { + list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) { if (part->header.signature != NVRAM_SIG_FREE) { prev = NULL; continue; @@ -333,7 +331,7 @@ loff_t __init nvram_create_partition(const char *name, int sig, /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ - list_for_each_entry(part, &nvram_part->partition, partition) { + list_for_each_entry(part, &nvram_partitions, partition) { if (part->header.signature != NVRAM_SIG_FREE) continue; @@ -412,7 +410,7 @@ int nvram_get_partition_size(loff_t data_index) { struct nvram_partition *part; - list_for_each_entry(part, &nvram_part->partition, partition) { + list_for_each_entry(part, &nvram_partitions, partition) { if (part->index + NVRAM_HEADER_LEN == data_index) return (part->header.length - 1) * NVRAM_BLOCK_LEN; } @@ -430,7 +428,7 @@ loff_t nvram_find_partition(const char *name, int sig, int *out_size) { struct nvram_partition *p; - list_for_each_entry(p, &nvram_part->partition, partition) { + list_for_each_entry(p, &nvram_partitions, partition) { if (p->header.signature == sig && (!name || !strncmp(p->header.name, name, 12))) { if (out_size) @@ -452,14 +450,6 @@ int __init nvram_scan_partitions(void) int total_size; int err; - /* Initialize our anchor for the nvram partition list */ - nvram_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); - if (!nvram_part) { - printk(KERN_ERR "nvram_init: Failed kmalloc\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&nvram_part->partition); - if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) return -ENODEV; total_size = ppc_md.nvram_size(); @@ -507,7 +497,7 @@ int __init nvram_scan_partitions(void) memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); tmp_part->index = cur_index; - list_add_tail(&tmp_part->partition, &nvram_part->partition); + list_add_tail(&tmp_part->partition, &nvram_partitions); cur_index += phead.length * NVRAM_BLOCK_LEN; } -- cgit v1.2.2 From 6024ede9ba84aa1b891c2d6bc98eda07801235e5 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Thu, 11 Nov 2010 18:54:27 +0000 Subject: powerpc/nvram: Handle partition names >= 12 chars The name field in the nvram_header can be < 12 chars, null-terminated, or 12 chars without the null. Handle this safely. Signed-off-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index b8a50fa5875b..bb12b3248f13 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -42,6 +42,7 @@ struct nvram_header { unsigned char signature; unsigned char checksum; unsigned short length; + /* Terminating null required only for names < 12 chars. */ char name[12]; }; @@ -201,7 +202,7 @@ static void __init nvram_print_partitions(char * label) printk(KERN_WARNING "--------%s---------\n", label); printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); list_for_each_entry(tmp_part, &nvram_partitions, partition) { - printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%s\n", + printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n", tmp_part->index, tmp_part->header.signature, tmp_part->header.checksum, tmp_part->header.length, tmp_part->header.name); @@ -256,7 +257,7 @@ int __init nvram_remove_partition(const char *name, int sig) /* Make partition a free partition */ part->header.signature = NVRAM_SIG_FREE; - sprintf(part->header.name, "wwwwwwwwwwww"); + strncpy(part->header.name, "wwwwwwwwwwww", 12); part->header.checksum = nvram_checksum(&part->header); rc = nvram_write_header(part); if (rc <= 0) { -- cgit v1.2.2 From b3c73856ae47d43d0d181f9de1c1c6c0820c4515 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Mon, 18 Oct 2010 07:27:04 +0000 Subject: powerpc/iommu: Use coherent_dma_mask for alloc_coherent The IOMMU code has been passing the dma-mask instead of the coherent_dma_mask to the iommu allocator. Coherent allocations should be made using the coherent_dma_mask. Also update the vio code to ensure the coherent_dma_mask is set. Without this change drivers, such as ibmvscsi, fail to load with the corrected dma_iommu_alloc_coherent(). Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/kernel/vio.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 6e54a0fd31aa..e7554154a6de 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -19,7 +19,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, - dma_handle, device_to_mask(dev), flag, + dma_handle, dev->coherent_dma_mask, flag, dev_to_node(dev)); } diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b2654058f2e4..1b695fdc362b 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1257,6 +1257,10 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->dev.parent = &vio_bus_device.dev; viodev->dev.bus = &vio_bus_type; viodev->dev.release = vio_dev_release; + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); + dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); /* register with generic device framework */ if (device_register(&viodev->dev)) { -- cgit v1.2.2 From 8f4da26e9bf89f54b68d5cc3f3596f45e5f43911 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 8 Dec 2010 00:55:03 +0000 Subject: powerpc: Fix incorrect comment about interrupt stack allocation We now allow interrupt stacks anywhere in the first segment which can be 256M or 1TB. Fix the comment. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ce6f61c6f871..5a0401fcaebd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -437,8 +437,8 @@ static void __init irqstack_early_init(void) unsigned int i; /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. + * Interrupt stacks must be in the first segment since we + * cannot afford to take SLB misses on them. */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) -- cgit v1.2.2 From 928a31978115b48ad634a97755915e8ab23c4749 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Thu, 18 Nov 2010 00:35:07 +0000 Subject: Powerpc: separate CONFIG_RELOCATABLE from CONFIG_CRASHDUMP in boot code Fix head_64.S so that we can build a relocatable kernel that isn't necessarily a crash-dump kernel Signed-off-by: Milton Miller Signed-off-by: Sonny Rao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ce41b97eb512..782f23df7c85 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -97,7 +97,7 @@ __secondary_hold_acknowledge: .llong hvReleaseData-KERNELBASE #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This * is used by kexec-tools to keep the the kdump kernel in the @@ -385,12 +385,10 @@ _STATIC(__after_prom_start) /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ sldi r25,r25,32 -#ifdef CONFIG_CRASH_DUMP lwz r7,__run_at_load-_stext(r26) - cmplwi cr0,r7,1 /* kdump kernel ? - stay where we are */ + cmplwi cr0,r7,1 /* flagged to stay where we are ? */ bne 1f add r25,r25,r26 -#endif 1: mr r3,r25 bl .relocate #endif -- cgit v1.2.2 From 364a1246522f99cbe58040e99af007ada31034ed Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Mon, 22 Nov 2010 21:30:33 +0000 Subject: powerpc/time: printk time stamp init not correct problem: I see sometimes on my mpc5200 based board such printk timing information: [ 0.000000] NR_IRQS:512 nr_irqs:512 16 [ 0.000000] MPC52xx PIC is up and running! [ 0.000000] clocksource: timebase mult[79364d9] shift[22] registered [ 0.000000] console [ttyPSC0] enabled [ 130.300633] pid_max: default: 32768 minimum: 301 [ 130.305647] Mount-cache hash table entries: 512 [ 130.315818] NET: Registered protocol family 16 reason: if the tbu not starts from 0 when linux boots, boot_tb maybe could not store the real 64 bit tbu value, because boot_tp is only a 32 bit unsigned long. solution: change boot_tb to u64 [BenH: Made it u64 instead of unsigned long long] Signed-off-by: Heiko Schocher cc: Wolfgang Denk Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 010406958d97..09e4dea4a85a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(rtc_lock); static u64 tb_to_ns_scale __read_mostly; static unsigned tb_to_ns_shift __read_mostly; -static unsigned long boot_tb __read_mostly; +static u64 boot_tb __read_mostly; extern struct timezone sys_tz; static long timezone_offset; -- cgit v1.2.2 From 4dfbf290aeb9d63a058d9d8237203b0b72bfbbe3 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 27 Nov 2010 14:24:53 +0000 Subject: powerpc: Fix PPC_PTRACE_SETHWDEBUG on PPC_BOOK3S Properly set the DABR_TRANSLATION/DABR_DATA_READ/DABR_DATA_READ bits in the dabr when setting the debug register via PPC_PTRACE_SETHWDEBUG. Also don't reject trigger type of PPC_BREAKPOINT_TRIGGER_READ. Signed-off-by: Andreas Schwab Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index a9b32967cff6..906536998291 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1316,6 +1316,10 @@ static int set_dac_range(struct task_struct *child, static long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr; +#endif + if (bp_info->version != 1) return -ENOTSUPP; #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1353,11 +1357,10 @@ static long ppc_set_hwdebug(struct task_struct *child, /* * We only support one data breakpoint */ - if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) || - ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) || - (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) || - (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) || - (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) return -EINVAL; if (child->thread.dabr) @@ -1366,7 +1369,14 @@ static long ppc_set_hwdebug(struct task_struct *child, if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - child->thread.dabr = (unsigned long)bp_info->addr; + dabr = (unsigned long)bp_info->addr & ~7UL; + dabr |= DABR_TRANSLATION; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dabr |= DABR_DATA_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dabr |= DABR_DATA_WRITE; + + child->thread.dabr = dabr; return 1; #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ -- cgit v1.2.2 From bb2c458b8b8c8a53f65b78051d22a0f13d53abcb Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 28 Nov 2010 06:33:14 +0000 Subject: powerpc: Update compat_arch_ptrace Update compat_arch_ptrace to follow recent changes in PTRACE_GET_DEBUGREG and the addition of PPC_PTRACE_{GETHWDBGINFO|{SET|DEL}HWDEBUG}. The latter three can be forwarded to arch_ptrace unchanged. Signed-off-by: Andreas Schwab Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 8a6daf4129f6..69c4be917d07 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -280,7 +280,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, (u32 __user *)data); +#else ret = put_user(child->thread.dabr, (u32 __user *)data); +#endif break; } @@ -312,6 +316,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_SET_DEBUGREG: case PTRACE_SYSCALL: case PTRACE_CONT: + case PPC_PTRACE_GETHWDBGINFO: + case PPC_PTRACE_SETHWDEBUG: + case PPC_PTRACE_DELHWDEBUG: ret = arch_ptrace(child, request, addr, data); break; -- cgit v1.2.2 From 4dfa9c474859629a2c4a3f8d29804d6a6c994908 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 7 Dec 2010 14:36:05 +0000 Subject: powerpc: iommu: Add device name to iommu error printks Right now its difficult to see which device is running out of iommu space: iommu_alloc failed, tbl c00000076e096660 vaddr c000000768806600 npages 1 Use dev_info() so we get the device name and location: ipr 0000:00:01.0: iommu_alloc failed, tbl c00000076e096660 vaddr c000000768806600 npages 1 Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index d5839179ec77..961bb03413f3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -311,8 +311,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { if (printk_ratelimit()) - printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" - " npages %lx\n", tbl, vaddr, npages); + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %lx npages %lu\n", tbl, vaddr, + npages); goto failure; } @@ -579,9 +580,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { - printk(KERN_INFO "iommu_alloc failed, " - "tbl %p vaddr %p npages %d\n", - tbl, vaddr, npages); + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %p npages %d\n", tbl, vaddr, + npages); } } else dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); @@ -627,7 +628,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, * the tce tables. */ if (order >= IOMAP_MAX_ORDER) { - printk("iommu_alloc_consistent size too large: 0x%lx\n", size); + dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n", + size); return NULL; } -- cgit v1.2.2 From 3b7a27db3b6b5501e3d1c1628e6d5a547ffe76c6 Mon Sep 17 00:00:00 2001 From: Jesse Larrew Date: Wed, 1 Dec 2010 12:31:26 +0000 Subject: powerpc: Disable VPHN polling during a suspend operation Tie the polling mechanism into the ibm,suspend-me rtas call to stop/restart polling before/after a suspend, hibernate, migrate, or checkpoint restart operation. This ensures that the system has a chance to disable the polling if the partition is migrated to a system that does not support VPHN (and vice versa). Signed-off-by: Jesse Larrew Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8fe8bc61c10a..2097f2b3cba8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,7 @@ #include #include #include +#include struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED @@ -713,6 +714,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w int cpu; slb_set_size(SLB_MIN_SIZE); + stop_topology_update(); printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && @@ -728,6 +730,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w rc = atomic_read(&data->error); atomic_set(&data->error, rc); + start_topology_update(); if (wake_when_done) { atomic_set(&data->done, 1); -- cgit v1.2.2 From 518fdae26a530d3f0f11e3650348ab75e5891cfd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 12 Nov 2010 14:49:19 +0000 Subject: powerpc/pci: Use printf extension %pR for struct resource Using %pR standardizes the struct resource output. Signed-off-by: Joe Perches Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index d43fc65749c1..851577608a78 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -193,8 +193,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) hose->io_resource.start += io_virt_offset; hose->io_resource.end += io_virt_offset; - pr_debug(" hose->io_resource=0x%016llx...0x%016llx\n", - hose->io_resource.start, hose->io_resource.end); + pr_debug(" hose->io_resource=%pR\n", &hose->io_resource); return 0; } -- cgit v1.2.2 From 2e80a82a49c4c7eca4e35734380f28298ba5db19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Nov 2010 23:17:36 +0100 Subject: perf: Dynamic pmu types Extend the perf_pmu_register() interface to allow for named and dynamic pmu types. Because we need to support the existing static types we cannot use dynamic types for everything, hence provide a type argument. If we want to enumerate the PMUs they need a name, provide one. Signed-off-by: Peter Zijlstra LKML-Reference: <20101117222056.259707703@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 2 +- arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 3129c855933c..567480705789 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ - perf_pmu_register(&power_pmu); + perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 7ecca59ddf77..4dcf5f831e9d 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); - perf_pmu_register(&fsl_emb_pmu); + perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); return 0; } -- cgit v1.2.2 From b5fb0cc7f1c90e3b00d40b64681efcbf8bcdeb9e Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Wed, 3 Nov 2010 17:36:37 +0800 Subject: powerpc/fsl_rio: Fix non-standard HID1 register access Moved setting of RFXE bit so we get machine checks on RIO errors into cpu_setup so that the RIO code isn't core specific. Signed-off-by: Shaohui Xie Cc: Li Yang Cc: Roy Zang Cc: Alexandre Bounine Signed-off-by: Kumar Gala --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 894e64fa481e..5c518ad3445c 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -64,6 +64,12 @@ _GLOBAL(__setup_cpu_e500v2) bl __e500_icache_setup bl __e500_dcache_setup bl __setup_e500_ivors +#ifdef CONFIG_RAPIDIO + /* Ensure that RFXE is set */ + mfspr r3,SPRN_HID1 + oris r3,r3,HID1_RFXE@h + mtspr SPRN_HID1,r3 +#endif mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) -- cgit v1.2.2 From 672c54466d24994eb9633f993862c89539504a42 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 13 Jan 2011 15:36:09 -0700 Subject: dt/flattree: Return virtual address from early_init_dt_alloc_memory_arch() The physical address is never used by the device tree code when allocating memory for unflattening. Change the architecture's alloc hook to return the virutal address instead. Signed-off-by: Grant Likely --- arch/powerpc/kernel/prom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9e3132db718b..7185f0da7dc3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -519,9 +519,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) memblock_add(base, size); } -u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return memblock_alloc(size, align); + return __va(memblock_alloc(size, align)); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v1.2.2 From 4bca770ede796a1ef7af9c983166d5608d9ccfaf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 17 Jan 2011 16:17:42 +1100 Subject: powerpc: perf: Fix frequency calculation for overflowing counters When profiling a benchmark that is almost 100% userspace, I noticed some wildly inaccurate profiles that showed almost all time spent in the kernel. Closer examination shows we were programming a tiny number of cycles into the PMU after each overflow (about ~200 away from the next overflow). This gets us stuck in a loop which we eventually break out of by throttling the PMU (there are regular throttle/unthrottle events in the log). It looks like we aren't setting event->hw.last_period to something same and the frequency to period calculations in perf are going haywire. With the following patch we find the correct period after a few interrupts and stay there. I also see no more throttle events. Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <20110117161742.5feb3761@kryten> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 567480705789..ab6f6beadb57 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1212,6 +1212,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (left <= 0) left = period; record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) val = 0x80000000LL - left; -- cgit v1.2.2 From 8c8a9b25b5de3f1eeac721cf34f4379e56d5d694 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 18 Jan 2011 21:44:04 +1100 Subject: powerpc, perf: Fix frequency calculation for overflowing counters (FSL version) When fixing the frequency calculations for perf on powerpc I forgot to fix the FSL version. If we dont set event->hw.last_period the frequency to period calculations in perf go haywire and we continually throttle/unthrottle the PMU. Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Cc: Scott Wood Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: linuxppc-dev@lists.ozlabs.org Cc: Peter Zijlstra LKML-Reference: <20110118214404.2f42e634@kryten> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event_fsl_emb.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 4dcf5f831e9d..b0dc8f7069cd 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -596,6 +596,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (left <= 0) left = period; record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) val = 0x80000000LL - left; -- cgit v1.2.2 From 06ca2188eccbd7932636ac5bde2837297800480e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 22 Dec 2010 16:42:56 +0000 Subject: powerpc/ppc32/tracing: Add stack frame to calls of trace_hardirqs_on/off 32-bit variant of the previous patch for 64-bit: << When an interrupt occurs in userspace, we can call trace_hardirqs_on/off() With one level stack. But if we have irqsoff tracing enabled, it checks both CALLER_ADDR0 and CALLER_ADDR1. The second call goes two stack frames up. If this is from user space, then there may not exist a second stack.... >> Signed-off-by: Steven Rostedt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_32.S | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index c22dc1ec1c94..56212bc0ab08 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -880,7 +880,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) */ andi. r10,r9,MSR_EE beq 1f + /* + * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, + * which is the stack frame here, we need to force a stack frame + * in case we came from user space. + */ + stwu r1,-32(r1) + mflr r0 + stw r0,4(r1) + stwu r1,-32(r1) bl trace_hardirqs_on + lwz r1,0(r1) + lwz r1,0(r1) lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ -- cgit v1.2.2 From 4c4a5cf64bfa0ddbe4145e6ca18a62eb70706edd Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 24 Dec 2010 20:03:59 +0000 Subject: powerpc/rtas_flash: Use simple_read_from_buffer Simplify read file operation for /proc/powerpc/rtas/* interface by using simple_read_from_buffer. Signed-off-by: Akinobu Mita Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas_flash.c | 53 +++++----------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2b442e6c21e6..bf5f5ce3a7bd 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -256,31 +256,16 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); struct rtas_update_flash_t *uf; char msg[RTAS_MSG_MAXLEN]; - int msglen; - uf = (struct rtas_update_flash_t *) dp->data; + uf = dp->data; if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) { get_flash_status_msg(uf->status, msg); } else { /* FIRMWARE_UPDATE_NAME */ sprintf(msg, "%d\n", uf->status); } - msglen = strlen(msg); - if (msglen > count) - msglen = count; - - if (ppos && *ppos != 0) - return 0; /* be cheap */ - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, strlen(msg)); } /* constructor for flash_block_cache */ @@ -394,26 +379,13 @@ static ssize_t manage_flash_read(struct file *file, char __user *buf, char msg[RTAS_MSG_MAXLEN]; int msglen; - args_buf = (struct rtas_manage_flash_t *) dp->data; + args_buf = dp->data; if (args_buf == NULL) return 0; msglen = sprintf(msg, "%d\n", args_buf->status); - if (msglen > count) - msglen = count; - if (ppos && *ppos != 0) - return 0; /* be cheap */ - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, msglen); } static ssize_t manage_flash_write(struct file *file, const char __user *buf, @@ -495,24 +467,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, char msg[RTAS_MSG_MAXLEN]; int msglen; - args_buf = (struct rtas_validate_flash_t *) dp->data; + args_buf = dp->data; - if (ppos && *ppos != 0) - return 0; /* be cheap */ - msglen = get_validate_flash_msg(args_buf, msg); - if (msglen > count) - msglen = count; - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, msglen); } static ssize_t validate_flash_write(struct file *file, const char __user *buf, -- cgit v1.2.2 From b18ae08deac23187e4a22a8c94a1a473be8e8c93 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jan 2011 03:49:25 +0000 Subject: powerpc/cell: Use system_wq in cpufreq_spudemand With cmwq, there's no reason to use a separate workqueue in cpufreq_spudemand. Use system_wq instead. The work items are already sync canceled on stop, so it's already guaranteed that no work is running when spu_gov_exit() is entered. Signed-off-by: Tejun Heo Cc: Arnd Bergmann Cc: linuxppc-dev@lists.ozlabs.org Cc: Dave Jones Cc: cpufreq@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 09e4dea4a85a..09d31dbf43f9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -265,11 +265,26 @@ void accumulate_stolen_time(void) { u64 sst, ust; - sst = scan_dispatch_log(get_paca()->starttime_user); - ust = scan_dispatch_log(get_paca()->starttime); - get_paca()->system_time -= sst; - get_paca()->user_time -= ust; - get_paca()->stolen_time += ust + sst; + u8 save_soft_enabled = local_paca->soft_enabled; + u8 save_hard_enabled = local_paca->hard_enabled; + + /* We are called early in the exception entry, before + * soft/hard_enabled are sync'ed to the expected state + * for the exception. We are hard disabled but the PACA + * needs to reflect that so various debug stuff doesn't + * complain + */ + local_paca->soft_enabled = 0; + local_paca->hard_enabled = 0; + + sst = scan_dispatch_log(local_paca->starttime_user); + ust = scan_dispatch_log(local_paca->starttime); + local_paca->system_time -= sst; + local_paca->user_time -= ust; + local_paca->stolen_time += ust + sst; + + local_paca->soft_enabled = save_soft_enabled; + local_paca->hard_enabled = save_hard_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) -- cgit v1.2.2 From 619b267724e8cc41072d650ae28114851dd8bf56 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 6 Jan 2011 17:54:58 +0000 Subject: powerpc/kexec: Remove ppc_md.machine_kexec_cleanup No one uses ppc_md.machine_kexec_cleanup, so remove it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index df7e20c191cd..fae67642499d 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -65,8 +65,6 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - if (ppc_md.machine_kexec_cleanup) - ppc_md.machine_kexec_cleanup(image); } void arch_crash_save_vmcoreinfo(void) -- cgit v1.2.2 From c94868788cf26d5ad816646dd9af3cdf6fbd92fd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 6 Jan 2011 17:55:36 +0000 Subject: powerpc/kexec: Remove ppc_md.machine_kexec No one uses ppc_md.machine_kexec, so remove it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index fae67642499d..43836aa8348e 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -85,10 +85,7 @@ void arch_crash_save_vmcoreinfo(void) */ void machine_kexec(struct kimage *image) { - if (ppc_md.machine_kexec) - ppc_md.machine_kexec(image); - else - default_machine_kexec(image); + default_machine_kexec(image); /* Fall back to normal restart if we're still alive. */ machine_restart(NULL); -- cgit v1.2.2 From c1f784e553490a2602567666fc79ff142cb4413b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 6 Jan 2011 17:56:09 +0000 Subject: powerpc/kdump: Remove ppc_md.machine_crash_shutdown No one uses ppc_md.machine_crash_shutdown, so remove it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 43836aa8348e..3427b2faa19a 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -44,10 +44,7 @@ void machine_kexec_mask_interrupts(void) { void machine_crash_shutdown(struct pt_regs *regs) { - if (ppc_md.machine_crash_shutdown) - ppc_md.machine_crash_shutdown(regs); - else - default_machine_crash_shutdown(regs); + default_machine_crash_shutdown(regs); } /* -- cgit v1.2.2 From 158d5b5e3654938cca963eec82cd10af49cda17f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 21 Jan 2011 13:43:59 +1100 Subject: powerpc/kdump: Move crash_kexec_stop_spus to kdump crash handler Use the crash handler hooks to run the SPU stop code, just like we do for ehea and cell RAS code. While I'm here I noticed "CPUSs reliabally" so fix the spelling MISTAKESs reliabally. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 72 ++------------------------------------------- 1 file changed, 2 insertions(+), 70 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 832c8c4db254..3d569e2aff18 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,7 +48,7 @@ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; cpumask_t cpus_in_sr = CPU_MASK_NONE; -#define CRASH_HANDLER_MAX 2 +#define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); @@ -125,7 +125,7 @@ static void crash_kexec_prepare_cpus(int cpu) smp_wmb(); /* - * FIXME: Until we will have the way to stop other CPUSs reliabally, + * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. * Delay of at least 10 seconds. @@ -254,72 +254,6 @@ void crash_kexec_secondary(struct pt_regs *regs) cpus_in_sr = CPU_MASK_NONE; } #endif -#ifdef CONFIG_SPU_BASE - -#include -#include - -struct crash_spu_info { - struct spu *spu; - u32 saved_spu_runcntl_RW; - u32 saved_spu_status_R; - u32 saved_spu_npc_RW; - u64 saved_mfc_sr1_RW; - u64 saved_mfc_dar; - u64 saved_mfc_dsisr; -}; - -#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ -static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; - -static void crash_kexec_stop_spus(void) -{ - struct spu *spu; - int i; - u64 tmp; - - for (i = 0; i < CRASH_NUM_SPUS; i++) { - if (!crash_spu_info[i].spu) - continue; - - spu = crash_spu_info[i].spu; - - crash_spu_info[i].saved_spu_runcntl_RW = - in_be32(&spu->problem->spu_runcntl_RW); - crash_spu_info[i].saved_spu_status_R = - in_be32(&spu->problem->spu_status_R); - crash_spu_info[i].saved_spu_npc_RW = - in_be32(&spu->problem->spu_npc_RW); - - crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); - crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); - tmp = spu_mfc_sr1_get(spu); - crash_spu_info[i].saved_mfc_sr1_RW = tmp; - - tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; - spu_mfc_sr1_set(spu, tmp); - - __delay(200); - } -} - -void crash_register_spus(struct list_head *list) -{ - struct spu *spu; - - list_for_each_entry(spu, list, full_list) { - if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) - continue; - - crash_spu_info[spu->number].spu = spu; - } -} - -#else -static inline void crash_kexec_stop_spus(void) -{ -} -#endif /* CONFIG_SPU_BASE */ /* * Register a function to be called on shutdown. Only use this if you @@ -439,8 +373,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; - crash_kexec_stop_spus(); - if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } -- cgit v1.2.2 From ac4414e4d3024a3c9ac5f54a734ac77dd7edfdb3 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 6 Jan 2011 18:00:36 +0000 Subject: powerpc/kdump: Disable ftrace during kexec We should disable ftrace during kexec, some of the tracers are very invasive and we do not want them going off while doing the low level work of swapping one kernel out for another. This mirrors what we do on x86. Even though we cannot return from a kexec on powerpc (since we do not implement CONFIG_KEXEC_JUMP), add the restore code in case we do one day. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 3427b2faa19a..49a170af8145 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -82,8 +83,14 @@ void arch_crash_save_vmcoreinfo(void) */ void machine_kexec(struct kimage *image) { + int save_ftrace_enabled; + + save_ftrace_enabled = __ftrace_enabled_save(); + default_machine_kexec(image); + __ftrace_enabled_restore(save_ftrace_enabled); + /* Fall back to normal restart if we're still alive. */ machine_restart(NULL); for(;;); -- cgit v1.2.2 From 7071854bb248926b85141d791f9fa17901a6fa4b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:44:30 +0000 Subject: powerpc: Print 32 bits of DSISR in show_regs We were printing 64 bits of DSISR in show_regs even though it is 32 bit. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 84906d3fc860..7a1d5cb76932 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -631,7 +631,7 @@ void show_regs(struct pt_regs * regs) #ifdef CONFIG_PPC_ADV_DEBUG_REGS printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else - printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); + printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif printk("TASK = %p[%d] '%s' THREAD: %p", current, task_pid_nr(current), current->comm, task_thread_info(current)); -- cgit v1.2.2 From a443506b8598bbd784cfe403ad1db2c7083ff465 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:45:31 +0000 Subject: powerpc: Don't force MSR_RI in machine_check_exception We should never force MSR_RI on. If we take a machine check with MSR_RI off then we have no chance of recovering safely. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1b2cdc8eec90..6865002df6ce 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -627,7 +627,6 @@ void machine_check_exception(struct pt_regs *regs) return; if (user_mode(regs)) { - regs->msr |= MSR_RI; _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); return; } @@ -643,10 +642,8 @@ void machine_check_exception(struct pt_regs *regs) return; #endif - if (debugger_fault_handler(regs)) { - regs->msr |= MSR_RI; + if (debugger_fault_handler(regs)) return; - } if (check_io_access(regs)) return; -- cgit v1.2.2 From dfb5509f8f49ffd52922060c830978ed5a9b9731 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:47:20 +0000 Subject: powerpc: Remove duplicate debugger hook in machine_check_exception We are calling debugger_fault_handler twice in machine_check_exception. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6865002df6ce..9e24224962d9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -648,8 +648,6 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) return; - if (debugger_fault_handler(regs)) - return; die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ -- cgit v1.2.2 From e49b1fae0ba4d06b29bd753a961abb447566bf4a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:48:14 +0000 Subject: powerpc: Don't silently handle machine checks from userspace If a machine check comes from userspace we send a SIGBUS to the task and fail to printk anything. If we are taking machine checks due to bad hardware we want to know about it right away. Furthermore if we don't complain loudly then it will look a lot like a bug in the userspace application, potentially causing a lot of confusion. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9e24224962d9..bd74fac169be 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -626,11 +626,6 @@ void machine_check_exception(struct pt_regs *regs) if (recover > 0) return; - if (user_mode(regs)) { - _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); - return; - } - #if defined(CONFIG_8xx) && defined(CONFIG_PCI) /* the qspan pci read routines can cause machine checks -- Cort * -- cgit v1.2.2 From 7f32c9c60089bfdb5eeeaae6e9f59046db998234 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:51:31 +0000 Subject: powerpc: Check RTAS extended log flag before checking length The spec suggests we should first check the extended log flag before checking the length field. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 0438f819fe6b..049dbecb5dbc 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -160,7 +160,7 @@ static int log_rtas_len(char * buf) /* rtas fixed header */ len = 8; err = (struct rtas_error_log *)buf; - if (err->extended_log_length) { + if (err->extended && err->extended_log_length) { /* extended header */ len += err->extended_log_length; -- cgit v1.2.2 From fbe754ca3a7579131ecec47c4fa10c40994b3804 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Jan 2011 19:52:31 +0000 Subject: powerpc: machine_check_generic is wrong on 64bit Decoding machine checks is CPU specific and so machine_check_generic doesn't do the right thing on 64bit chips. Luckily we never call into this code because we call ppc_md.machine_check_exception instead if available. Since we check cur_cpu_spec->machine_check before calling it, we may as well remove machine_check_generic from 64bit archs. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index be5ab18b03b5..8d74a24c5502 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -116,7 +116,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power3", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "power3", }, { /* Power3+ */ @@ -132,7 +131,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power3", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "power3", }, { /* Northstar */ @@ -148,7 +146,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* Pulsar */ @@ -164,7 +161,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* I-star */ @@ -180,7 +176,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* S-star */ @@ -196,7 +191,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* Power4 */ @@ -212,7 +206,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power4", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "power4", }, { /* Power4+ */ @@ -228,7 +221,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power4", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "power4", }, { /* PPC970 */ @@ -247,7 +239,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970FX */ @@ -266,7 +257,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -285,7 +275,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970MP */ @@ -304,7 +293,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970GX */ @@ -322,7 +310,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* Power5 GR */ @@ -343,7 +330,6 @@ static struct cpu_spec __initdata cpu_specs[] = { */ .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5", }, { /* Power5++ */ @@ -360,7 +346,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5+", }, { /* Power5 GS */ @@ -378,7 +363,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -390,7 +374,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTR_HPTE_TABLE, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power5+", @@ -413,7 +396,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, .oprofile_mmcra_clear = POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER, - .machine_check = machine_check_generic, .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -425,7 +407,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTR_HPTE_TABLE, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power6", @@ -440,7 +421,6 @@ static struct cpu_spec __initdata cpu_specs[] = { MMU_FTR_TLBIE_206, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power7", @@ -492,7 +472,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/cell-be", .oprofile_type = PPC_OPROFILE_CELL, - .machine_check = machine_check_generic, .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -510,7 +489,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_pa6t, .oprofile_cpu_type = "ppc64/pa6t", .oprofile_type = PPC_OPROFILE_PA6T, - .machine_check = machine_check_generic, .platform = "pa6t", }, { /* default match */ @@ -524,7 +502,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .machine_check = machine_check_generic, .platform = "power4", } #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.2 From 19df0c2fef010e94e90df514aaf4e73f6b80145c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jan 2011 14:26:50 +0100 Subject: percpu: align percpu readmostly subsection to cacheline Currently percpu readmostly subsection may share cachelines with other percpu subsections which may result in unnecessary cacheline bounce and performance degradation. This patch adds @cacheline parameter to PERCPU() and PERCPU_VADDR() linker macros, makes each arch linker scripts specify its cacheline size and use it to align percpu subsections. This is based on Shaohua's x86 only patch. Signed-off-by: Tejun Heo Cc: Shaohua Li --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8a0deefac08d..b9150f07d266 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(PAGE_SIZE) + PERCPU(L1_CACHE_BYTES, PAGE_SIZE) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { -- cgit v1.2.2 From c48d0dbaac7f27c083430170c66194d6a523bc2a Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 26 Jan 2011 06:17:58 +0000 Subject: powerpc/476: define specific cpu table entry DD2 core The DD2 core still has some unstability. Define CPU_FTR_476_DD2 to enable workarounds in later patches. This is based on an earlier, unreleased patch for DD1 by Ben Herrenschmidt. Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index be5ab18b03b5..bf7cf86e04dd 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1834,11 +1834,11 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, - { /* 476 core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x11a50000, + { /* 476 DD2 core */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x11a52080, .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .mmu_features = MMU_FTR_TYPE_47x | @@ -1862,6 +1862,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, -- cgit v1.2.2 From 04bea68b2f0eeebb089ecc67b618795925268b4a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 24 Jan 2011 09:58:55 +0530 Subject: of/pci: move of_irq_map_pci() into generic code There is a tiny difference between PPC32 and PPC64. Microblaze uses the PPC32 variant. Signed-off-by: Sebastian Andrzej Siewior [grant.likely@secretlab.ca: Added comment to #endif, moved documentation block to function implementation, fixed for non ppc and microblaze compiles] Signed-off-by: Grant Likely --- arch/powerpc/kernel/pci-common.c | 1 + arch/powerpc/kernel/prom_parse.c | 84 ---------------------------------------- 2 files changed, 1 insertion(+), 84 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 10a44e68ef11..eb341be9a4d9 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index c2b7a07cc3d3..47187cc2cf00 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -2,95 +2,11 @@ #include #include -#include #include #include #include #include #include -#include - -#ifdef CONFIG_PCI -int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) -{ - struct device_node *dn, *ppnode; - struct pci_dev *ppdev; - u32 lspec; - u32 laddr[3]; - u8 pin; - int rc; - - /* Check if we have a device node, if yes, fallback to standard OF - * parsing - */ - dn = pci_device_to_OF_node(pdev); - if (dn) { - rc = of_irq_map_one(dn, 0, out_irq); - if (!rc) - return rc; - } - - /* Ok, we don't, time to have fun. Let's start by building up an - * interrupt spec. we assume #interrupt-cells is 1, which is standard - * for PCI. If you do different, then don't use that routine. - */ - rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); - if (rc != 0) - return rc; - /* No pin, exit */ - if (pin == 0) - return -ENODEV; - - /* Now we walk up the PCI tree */ - lspec = pin; - for (;;) { - /* Get the pci_dev of our parent */ - ppdev = pdev->bus->self; - - /* Ouch, it's a host bridge... */ - if (ppdev == NULL) { -#ifdef CONFIG_PPC64 - ppnode = pci_bus_to_OF_node(pdev->bus); -#else - struct pci_controller *host; - host = pci_bus_to_host(pdev->bus); - ppnode = host ? host->dn : NULL; -#endif - /* No node for host bridge ? give up */ - if (ppnode == NULL) - return -EINVAL; - } else - /* We found a P2P bridge, check if it has a node */ - ppnode = pci_device_to_OF_node(ppdev); - - /* Ok, we have found a parent with a device-node, hand over to - * the OF parsing code. - * We build a unit address from the linux device to be used for - * resolution. Note that we use the linux bus number which may - * not match your firmware bus numbering. - * Fortunately, in most cases, interrupt-map-mask doesn't include - * the bus number as part of the matching. - * You should still be careful about that though if you intend - * to rely on this function (you ship a firmware that doesn't - * create device nodes for all PCI devices). - */ - if (ppnode) - break; - - /* We can only get here if we hit a P2P bridge with no node, - * let's do standard swizzling and try again - */ - lspec = pci_swizzle_interrupt_pin(pdev, lspec); - pdev = ppdev; - } - - laddr[0] = (pdev->bus->number << 16) - | (pdev->devfn << 8); - laddr[1] = laddr[2] = 0; - return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq); -} -EXPORT_SYMBOL_GPL(of_irq_map_pci); -#endif /* CONFIG_PCI */ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, unsigned long *busno, unsigned long *phys, unsigned long *size) -- cgit v1.2.2 From b5d937de0367d26f65b9af1aef5f2c34c1939be0 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 4 Feb 2011 11:24:11 -0700 Subject: powerpc/pci: Make both ppc32 and ppc64 use sysdata for pci_controller Currently, ppc32 uses sysdata for the pci_controller pointer, and ppc64 uses it to hold the device_node pointer. This patch moves the of_node pointer into (struct pci_bus*)->dev.of_node and (struct pci_dev*)->dev.of_node so that sysdata can be converted to always use the pci_controller pointer instead. It also fixes up the allocating of pci devices so that the of_node pointer gets assigned consistently and increments the ref count. Signed-off-by: Grant Likely --- arch/powerpc/kernel/of_platform.c | 2 +- arch/powerpc/kernel/pci-common.c | 11 +++-------- arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/kernel/pci_64.c | 6 +++--- arch/powerpc/kernel/pci_dn.c | 9 ++++++--- arch/powerpc/kernel/pci_of_scan.c | 4 ++-- 6 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index b2c363ef38ad..9bd951c67767 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -74,7 +74,7 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev, #endif /* CONFIG_EEH */ /* Scan the bus */ - pcibios_scan_phb(phb, dev->dev.of_node); + pcibios_scan_phb(phb); if (phb->bus == NULL) return -ENXIO; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index eb341be9a4d9..3cd85faa8ac6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1688,13 +1688,8 @@ int early_find_capability(struct pci_controller *hose, int bus, int devfn, /** * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus * @hose: Pointer to the PCI host controller instance structure - * @sysdata: value to use for sysdata pointer. ppc32 and ppc64 differ here - * - * Note: the 'data' pointer is a temporary measure. As 32 and 64 bit - * pci code gets merged, this parameter should become unnecessary because - * both will use the same value. */ -void __devinit pcibios_scan_phb(struct pci_controller *hose, void *sysdata) +void __devinit pcibios_scan_phb(struct pci_controller *hose) { struct pci_bus *bus; struct device_node *node = hose->dn; @@ -1704,13 +1699,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose, void *sysdata) node ? node->full_name : ""); /* Create an empty bus for the toplevel */ - bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, - sysdata); + bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, hose); if (bus == NULL) { pr_err("Failed to create bus for PCI domain %04x\n", hose->global_number); return; } + bus->dev.of_node = of_node_get(node); bus->secondary = hose->first_busno; hose->bus = bus; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index e7db5b48004a..bedb370459f2 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -381,7 +381,7 @@ static int __init pcibios_init(void) if (pci_assign_all_buses) hose->first_busno = next_busno; hose->last_busno = 0xff; - pcibios_scan_phb(hose, hose); + pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 851577608a78..fc6452b6be9f 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -64,7 +64,7 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - pcibios_scan_phb(hose, hose->dn); + pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); } @@ -242,10 +242,10 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, break; bus = NULL; } - if (bus == NULL || bus->sysdata == NULL) + if (bus == NULL || bus->dev.of_node == NULL) return -ENODEV; - hose_node = (struct device_node *)bus->sysdata; + hose_node = bus->dev.of_node; hose = PCI_DN(hose_node)->phb; switch (which) { diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d56b35ee7f74..29852688ceaa 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -161,7 +161,7 @@ static void *is_devfn_node(struct device_node *dn, void *data) /* * This is the "slow" path for looking up a device_node from a * pci_dev. It will hunt for the device under its parent's - * phb and then update sysdata for a future fastpath. + * phb and then update of_node pointer. * * It may also do fixups on the actual device since this happens * on the first read/write. @@ -170,16 +170,19 @@ static void *is_devfn_node(struct device_node *dn, void *data) * In this case it may probe for real hardware ("just in case") * and add a device_node to the device tree if necessary. * + * Is this function necessary anymore now that dev->dev.of_node is + * used to store the node pointer? + * */ struct device_node *fetch_dev_dn(struct pci_dev *dev) { - struct device_node *orig_dn = dev->sysdata; + struct device_node *orig_dn = dev->dev.of_node; struct device_node *dn; unsigned long searchval = (dev->bus->number << 8) | dev->devfn; dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval); if (dn) - dev->sysdata = dn; + dev->dev.of_node = dn; return dn; } EXPORT_SYMBOL(fetch_dev_dn); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index e751506323b4..1e89a72fd030 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -135,7 +135,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); dev->bus = bus; - dev->sysdata = node; + dev->dev.of_node = of_node_get(node); dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; dev->devfn = devfn; @@ -238,7 +238,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node, bus->primary = dev->bus->number; bus->subordinate = busrange[1]; bus->bridge_ctl = 0; - bus->sysdata = node; + bus->dev.of_node = of_node_get(node); /* parse ranges property */ /* PCI #address-cells == 3 and #size-cells == 2 always */ -- cgit v1.2.2 From af9eef3c7b1ed004c378c89b87642f4937337d50 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 20 Jan 2011 20:36:03 +0000 Subject: powerpc: Pass the right cpu_spec to ->setup_cpu() on 64-bit When calling setup_cpu() on 64-bit, we pass a pointer to the cputable entry we have found. This used to be fine when cur_cpu_spec was a pointer to that entry, but nowadays, we copy the entry into a separate variable, and we do so before we call the setup_cpu() callback. That means that any attempt by that callback at patching the CPU table entry (to adjust CPU features for example) will patch the wrong table. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8d74a24c5502..e8e915ce3d8d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2076,8 +2076,8 @@ static void __init setup_cpu_spec(unsigned long offset, struct cpu_spec *s) * pointer on ppc64 and booke as we are running at 0 in real mode * on ppc64 and reloc_offset is always 0 on booke. */ - if (s->cpu_setup) { - s->cpu_setup(offset, s); + if (t->cpu_setup) { + t->cpu_setup(offset, t); } #endif /* CONFIG_PPC64 || CONFIG_BOOKE */ } -- cgit v1.2.2 From 1f1936ff3febf38d582177ea319eaa278f32c91f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 20 Jan 2011 20:35:23 +0000 Subject: powerpc: Fix some 6xx/7xxx CPU setup functions Some of those functions try to adjust the CPU features, for example to remove NAP support on some revisions. However, they seem to use r5 as an index into the CPU table entry, which might have been right a long time ago but no longer is. r4 is the right register to use. This probably caused some off behaviours on some PowerMac variants using 750cx or 7455 processor revisions. Signed-off-by: Benjamin Herrenschmidt CC: stable@kernel.org --- arch/powerpc/kernel/cpu_setup_6xx.S | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 55cba4a8a959..f8cd9fba4d35 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -18,7 +18,7 @@ #include _GLOBAL(__setup_cpu_603) - mflr r4 + mflr r5 BEGIN_MMU_FTR_SECTION li r10,0 mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ @@ -27,60 +27,60 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_604) - mflr r4 + mflr r5 bl setup_common_caches bl setup_604_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750cx) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750cx - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750fx) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750fx - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_7400) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_7400_workarounds bl setup_common_caches bl setup_750_7400_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_7410) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_7410_workarounds bl setup_common_caches bl setup_750_7400_hid0 li r3,0 mtspr SPRN_L2CR2,r3 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_745x) - mflr r4 + mflr r5 bl setup_common_caches bl setup_745x_specifics - mtlr r4 + mtlr r5 blr /* Enable caches for 603's, 604, 750 & 7400 */ @@ -194,10 +194,10 @@ setup_750cx: cror 4*cr0+eq,4*cr0+eq,4*cr1+eq cror 4*cr0+eq,4*cr0+eq,4*cr2+eq bnelr - lwz r6,CPU_SPEC_FEATURES(r5) + lwz r6,CPU_SPEC_FEATURES(r4) li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 - stw r6,CPU_SPEC_FEATURES(r5) + stw r6,CPU_SPEC_FEATURES(r4) blr /* 750fx specific @@ -225,12 +225,12 @@ BEGIN_FTR_SECTION andis. r11,r11,L3CR_L3E@h beq 1f END_FTR_SECTION_IFSET(CPU_FTR_L3CR) - lwz r6,CPU_SPEC_FEATURES(r5) + lwz r6,CPU_SPEC_FEATURES(r4) andi. r0,r6,CPU_FTR_L3_DISABLE_NAP beq 1f li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 - stw r6,CPU_SPEC_FEATURES(r5) + stw r6,CPU_SPEC_FEATURES(r4) 1: mfspr r11,SPRN_HID0 -- cgit v1.2.2 From 710ac54be44e0cc53f5bf29b03d12c8706e7077a Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 17 Feb 2011 00:26:57 -0700 Subject: dt/powerpc: move of_bus_type infrastructure to ibmebus arch/powerpc/kernel/ibmebus.c is the only remaining user of the of_bus_type support code for initializing the bus and registering drivers. All others have either been switched to the vanilla platform bus or already have their own infrastructure. This patch moves the functionality that ibmebus is using out of drivers/of/{platform,device}.c and into ibmebus.c where it is actually used. Also renames the moved symbols from of_platform_* to ibmebus_bus_* to reflect the actual usage. This patch is part of moving all of the of_platform_bus_type users over to the platform_bus_type. Signed-off-by: Grant Likely --- arch/powerpc/kernel/ibmebus.c | 404 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 400 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index f62efdfd1769..c00d4ca1ee15 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -201,13 +201,14 @@ int ibmebus_register_driver(struct of_platform_driver *drv) /* If the driver uses devices that ibmebus doesn't know, add them */ ibmebus_create_devices(drv->driver.of_match_table); - return of_register_driver(drv, &ibmebus_bus_type); + drv->driver.bus = &ibmebus_bus_type; + return driver_register(&drv->driver); } EXPORT_SYMBOL(ibmebus_register_driver); void ibmebus_unregister_driver(struct of_platform_driver *drv) { - of_unregister_driver(drv); + driver_unregister(&drv->driver); } EXPORT_SYMBOL(ibmebus_unregister_driver); @@ -308,15 +309,410 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, } } + static struct bus_attribute ibmebus_bus_attrs[] = { __ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe), __ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove), __ATTR_NULL }; +static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct of_device_id *matches = drv->of_match_table; + + if (!matches) + return 0; + + return of_match_device(matches, dev) != NULL; +} + +static int ibmebus_bus_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct platform_device *of_dev; + const struct of_device_id *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_platform_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->driver.of_match_table, dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int ibmebus_bus_device_remove(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static void ibmebus_bus_device_shutdown(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->shutdown) + drv->shutdown(of_dev); +} + +/* + * ibmebus_bus_device_attrs + */ +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name); +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->name); +} + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2); + buf[len] = '\n'; + buf[len+1] = 0; + return len+1; +} + +struct device_attribute ibmebus_bus_device_attrs[] = { + __ATTR_RO(devspec), + __ATTR_RO(name), + __ATTR_RO(modalias), + __ATTR_NULL +}; + +#ifdef CONFIG_PM_SLEEP +static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->suspend) + ret = drv->suspend(of_dev, mesg); + return ret; +} + +static int ibmebus_bus_legacy_resume(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->resume) + ret = drv->resume(of_dev); + return ret; +} + +static int ibmebus_bus_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm && drv->pm->prepare) + ret = drv->pm->prepare(dev); + + return ret; +} + +static void ibmebus_bus_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int ibmebus_bus_pm_suspend(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend) + ret = drv->pm->suspend(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int ibmebus_bus_pm_suspend_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend_noirq) + ret = drv->pm->suspend_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume) + ret = drv->pm->resume(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume_noirq) + ret = drv->pm->resume_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_SUSPEND */ + +#define ibmebus_bus_pm_suspend NULL +#define ibmebus_bus_pm_resume NULL +#define ibmebus_bus_pm_suspend_noirq NULL +#define ibmebus_bus_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION + +static int ibmebus_bus_pm_freeze(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze) + ret = drv->pm->freeze(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); + } + + return ret; +} + +static int ibmebus_bus_pm_freeze_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze_noirq) + ret = drv->pm->freeze_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw) + ret = drv->pm->thaw(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw_noirq) + ret = drv->pm->thaw_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff) + ret = drv->pm->poweroff(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff_noirq) + ret = drv->pm->poweroff_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore) + ret = drv->pm->restore(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore_noirq) + ret = drv->pm->restore_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_HIBERNATION */ + +#define ibmebus_bus_pm_freeze NULL +#define ibmebus_bus_pm_thaw NULL +#define ibmebus_bus_pm_poweroff NULL +#define ibmebus_bus_pm_restore NULL +#define ibmebus_bus_pm_freeze_noirq NULL +#define ibmebus_bus_pm_thaw_noirq NULL +#define ibmebus_bus_pm_poweroff_noirq NULL +#define ibmebus_bus_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATION */ + +static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { + .prepare = ibmebus_bus_pm_prepare, + .complete = ibmebus_bus_pm_complete, + .suspend = ibmebus_bus_pm_suspend, + .resume = ibmebus_bus_pm_resume, + .freeze = ibmebus_bus_pm_freeze, + .thaw = ibmebus_bus_pm_thaw, + .poweroff = ibmebus_bus_pm_poweroff, + .restore = ibmebus_bus_pm_restore, + .suspend_noirq = ibmebus_bus_pm_suspend_noirq, + .resume_noirq = ibmebus_bus_pm_resume_noirq, + .freeze_noirq = ibmebus_bus_pm_freeze_noirq, + .thaw_noirq = ibmebus_bus_pm_thaw_noirq, + .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, + .restore_noirq = ibmebus_bus_pm_restore_noirq, +}; + +#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) + +#else /* !CONFIG_PM_SLEEP */ + +#define IBMEBUS_BUS_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + struct bus_type ibmebus_bus_type = { + .name = "ibmebus", .uevent = of_device_uevent, - .bus_attrs = ibmebus_bus_attrs + .bus_attrs = ibmebus_bus_attrs, + .match = ibmebus_bus_bus_match, + .probe = ibmebus_bus_device_probe, + .remove = ibmebus_bus_device_remove, + .shutdown = ibmebus_bus_device_shutdown, + .dev_attrs = ibmebus_bus_device_attrs, + .pm = IBMEBUS_BUS_PM_OPS_PTR, }; EXPORT_SYMBOL(ibmebus_bus_type); @@ -326,7 +722,7 @@ static int __init ibmebus_bus_init(void) printk(KERN_INFO "IBM eBus Device Driver\n"); - err = of_bus_type_init(&ibmebus_bus_type, "ibmebus"); + err = bus_register(&ibmebus_bus_type); if (err) { printk(KERN_ERR "%s: failed to register IBM eBus.\n", __func__); -- cgit v1.2.2 From 000061245a6797d542854106463b6b20fbdcb12e Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 22 Feb 2011 19:59:54 -0700 Subject: dt/powerpc: Eliminate users of of_platform_{,un}register_driver Get rid of old users of of_platform_driver in arch/powerpc. Most of_platform_driver users can be converted to use the platform_bus directly. Signed-off-by: Grant Likely --- arch/powerpc/kernel/of_platform.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 9bd951c67767..24582181b6ec 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -36,8 +36,7 @@ * lacking some bits needed here. */ -static int __devinit of_pci_phb_probe(struct platform_device *dev, - const struct of_device_id *match) +static int __devinit of_pci_phb_probe(struct platform_device *dev) { struct pci_controller *phb; @@ -104,7 +103,7 @@ static struct of_device_id of_pci_phb_ids[] = { {} }; -static struct of_platform_driver of_pci_phb_driver = { +static struct platform_driver of_pci_phb_driver = { .probe = of_pci_phb_probe, .driver = { .name = "of-pci", @@ -115,7 +114,7 @@ static struct of_platform_driver of_pci_phb_driver = { static __init int of_pci_phb_init(void) { - return of_register_platform_driver(&of_pci_phb_driver); + return platform_driver_register(&of_pci_phb_driver); } device_initcall(of_pci_phb_init); -- cgit v1.2.2 From 357574c48260c9177803a23b881d0af535670ec0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 23 Feb 2011 12:46:16 +0000 Subject: powerpc/kexec: Restore ppc_md.machine_kexec Kyle Moffett points out that mpc85xx has started using the ppc_md.machine_kexec hook. As such, revert patch c94868788cf2 (powerpc/kexec: Remove ppc_md.machine_kexec). Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 49a170af8145..a5f8672eeff3 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -87,7 +87,10 @@ void machine_kexec(struct kimage *image) save_ftrace_enabled = __ftrace_enabled_save(); - default_machine_kexec(image); + if (ppc_md.machine_kexec) + ppc_md.machine_kexec(image); + else + default_machine_kexec(image); __ftrace_enabled_restore(save_ftrace_enabled); -- cgit v1.2.2 From e0780b720f75487911e0174ec3dec2da49f7bbfa Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 10 Feb 2011 04:44:35 +0000 Subject: powerpc: Fix call to flush_ptrace_hw_breakpoint() Fix the error in spelling the config option for hw-breakpoints and fix the build issue that follows. Signed-off by: K.Prasad Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7a1d5cb76932..8303a6c65ef7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -353,6 +353,7 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread) prime_debug_regs(new_thread); } #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT static void set_debug_reg_defaults(struct thread_struct *thread) { if (thread->dabr) { @@ -360,6 +361,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread) set_dabr(0); } } +#endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ int set_dabr(unsigned long dabr) @@ -670,11 +672,11 @@ void flush_thread(void) { discard_lazy_cpu_state(); -#ifdef CONFIG_HAVE_HW_BREAKPOINTS +#ifdef CONFIG_HAVE_HW_BREAKPOINT flush_ptrace_hw_breakpoint(current); -#else /* CONFIG_HAVE_HW_BREAKPOINTS */ +#else /* CONFIG_HAVE_HW_BREAKPOINT */ set_debug_reg_defaults(¤t->thread); -#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ } void -- cgit v1.2.2 From 089fb442f3018a3ed094f8ac7a7cc2d3bd03b114 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Jan 2011 06:12:28 +0000 Subject: powerpc: Use ARCH_IRQ_INIT_FLAGS Define the ARCH_IRQ_INIT_FLAGS instead of fixing it up in a loop. Signed-off-by: Thomas Gleixner Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ce557f6f00fc..0531ccda8005 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -1074,21 +1074,6 @@ void irq_free_virt(unsigned int virq, unsigned int count) int arch_early_irq_init(void) { - struct irq_desc *desc; - int i; - - for (i = 0; i < NR_IRQS; i++) { - desc = irq_to_desc(i); - if (desc) - desc->status |= IRQ_NOREQUEST; - } - - return 0; -} - -int arch_init_chip_data(struct irq_desc *desc, int node) -{ - desc->status |= IRQ_NOREQUEST; return 0; } -- cgit v1.2.2 From a9d8946b4af9e003c645ffb46489fdeb154e7ed9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Jan 2011 06:12:30 +0000 Subject: powerpc: Use new irq allocator Use the new functions and free the descriptor when the virq is destroyed. Signed-off-by: Thomas Gleixner Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0531ccda8005..8a958ca26ac2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -678,16 +678,15 @@ void irq_set_virq_count(unsigned int count) static int irq_setup_virq(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - struct irq_desc *desc; + int res; - desc = irq_to_desc_alloc_node(virq, 0); - if (!desc) { + res = irq_alloc_desc_at(virq, 0); + if (res != virq) { pr_debug("irq: -> allocating desc failed\n"); goto error; } - /* Clear IRQ_NOREQUEST flag */ - desc->status &= ~IRQ_NOREQUEST; + irq_clear_status_flags(virq, IRQ_NOREQUEST); /* map it */ smp_wmb(); @@ -696,11 +695,13 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, if (host->ops->map(host, virq, hwirq)) { pr_debug("irq: -> mapping failed, freeing\n"); - goto error; + goto errdesc; } return 0; +errdesc: + irq_free_descs(virq, 1); error: irq_free_virt(virq, 1); return -1; @@ -879,9 +880,9 @@ void irq_dispose_mapping(unsigned int virq) smp_mb(); irq_map[virq].hwirq = host->inval_irq; - /* Set some flags */ - irq_to_desc(virq)->status |= IRQ_NOREQUEST; + irq_set_status_flags(virq, IRQ_NOREQUEST); + irq_free_descs(virq, 1); /* Free it */ irq_free_virt(virq, 1); } -- cgit v1.2.2 From 9ff0c61d08ac4defa5ad6c65935a67643b8f4ce3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Feb 2011 11:57:27 +0000 Subject: powerpc: Mask smp_processor_id() false positive The rtas_event_scan() function uses smp_processor_id() to select a starting point in cpu_online_mask, and does so under the protection of get_online_cpus(). This might not select the current processor in any case, so switch to raw_smp_processor_id(). Signed-off-by: Paul E. McKenney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 049dbecb5dbc..7980ec0e1e1a 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -412,7 +412,8 @@ static void rtas_event_scan(struct work_struct *w) get_online_cpus(); - cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + /* raw_ OK because just using CPU as starting point. */ + cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (cpu >= nr_cpu_ids) { cpu = cpumask_first(cpu_online_mask); -- cgit v1.2.2 From 6dd227002972be910c6191f38f8641e01796557f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 27 Jan 2011 10:30:44 +0000 Subject: powerpc: Fix memory limits when starting at a non-zero address memblock_enforce_memory_limit() takes the desired maximum quantity of memory to end up with, not an address above which memory will not be used. Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7185f0da7dc3..05b7139d6a27 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -97,7 +97,7 @@ static void __init move_device_tree(void) start = __pa(initial_boot_params); size = be32_to_cpu(initial_boot_params->totalsize); - if ((memory_limit && (start + size) > memory_limit) || + if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || overlaps_crashkernel(start, size)) { p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); -- cgit v1.2.2 From 0f4ac132365e56802cbe377313491aa84086371c Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Wed, 9 Feb 2011 12:43:13 +0000 Subject: powerpc/nvram: Generalize code for OS partitions in NVRAM Adapt the functions used to create and write to the RTAS-log partition to work with any OS-type partition. Signed-off-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/nvram_64.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index bb12b3248f13..bec1e930ed73 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -237,22 +237,45 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) return c_sum; } +/* + * Per the criteria passed via nvram_remove_partition(), should this + * partition be removed? 1=remove, 0=keep + */ +static int nvram_can_remove_partition(struct nvram_partition *part, + const char *name, int sig, const char *exceptions[]) +{ + if (part->header.signature != sig) + return 0; + if (name) { + if (strncmp(name, part->header.name, 12)) + return 0; + } else if (exceptions) { + const char **except; + for (except = exceptions; *except; except++) { + if (!strncmp(*except, part->header.name, 12)) + return 0; + } + } + return 1; +} + /** * nvram_remove_partition - Remove one or more partitions in nvram * @name: name of the partition to remove, or NULL for a * signature only match * @sig: signature of the partition(s) to remove + * @exceptions: When removing all partitions with a matching signature, + * leave these alone. */ -int __init nvram_remove_partition(const char *name, int sig) +int __init nvram_remove_partition(const char *name, int sig, + const char *exceptions[]) { struct nvram_partition *part, *prev, *tmp; int rc; list_for_each_entry(part, &nvram_partitions, partition) { - if (part->header.signature != sig) - continue; - if (name && strncmp(name, part->header.name, 12)) + if (!nvram_can_remove_partition(part, name, sig, exceptions)) continue; /* Make partition a free partition */ -- cgit v1.2.2 From f2f6dad6ca3b06ae35a2e7b63f38158242c01531 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 6 Mar 2011 18:02:31 +0000 Subject: powerpc/iseries: Fix early init access to lppaca The combination of commit 8154c5d22d91cd16bd9985b0638c8957e4688d0e and 93c22703efa72c7527dbd586d1951c1f4a85fd70 Broke boot on iSeries. The problem is that iSeries very early boot code, which generates the device-tree and runs before our normal early initializations does need access the lppaca's very early, before the PACA array is initialized, and in fact even before the boot PACA has been initialized (it contains all 0's at this stage). However, the first patch above makes that code use the new llpaca_of(cpu) accessor, which itself is changed by the second patch to use the PACA array. We fix that by reverting iSeries to directly dereferencing the array. In addition, we fix all iterators in the iSeries code to always skip CPU whose number is above 63 which is the maximum size of that array and the maximum number of supported CPUs on these machines. Additionally, we make sure the boot_paca is properly initialized in our early startup code. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ebf9846f3c3b..f4adf89d7614 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -26,20 +26,6 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S -/* - * We only have to have statically allocated lppaca structs on - * legacy iSeries, which supports at most 64 cpus. - */ -#ifdef CONFIG_PPC_ISERIES -#if NR_CPUS < 64 -#define NR_LPPACAS NR_CPUS -#else -#define NR_LPPACAS 64 -#endif -#else /* not iSeries */ -#define NR_LPPACAS 1 -#endif - /* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call -- cgit v1.2.2 From e11802872db82417e51e1bbe0751dbb21842d713 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 7 Mar 2011 14:00:20 +0000 Subject: powerpc: core irq_data conversion. Signed-off-by: Lennert Buytenhek Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 23 ++++++++++++++++------- arch/powerpc/kernel/machine_kexec.c | 21 ++++++++++++--------- 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8a958ca26ac2..0a5570338b96 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -237,6 +237,7 @@ int show_interrupts(struct seq_file *p, void *v) int i = *(loff_t *) v, j, prec; struct irqaction *action; struct irq_desc *desc; + struct irq_chip *chip; if (i > nr_irqs) return 0; @@ -270,8 +271,9 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); - if (desc->chip) - seq_printf(p, " %-16s", desc->chip->name); + chip = get_irq_desc_chip(desc); + if (chip) + seq_printf(p, " %-16s", chip->name); else seq_printf(p, " %-16s", "None"); seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); @@ -313,6 +315,8 @@ void fixup_irqs(const struct cpumask *map) alloc_cpumask_var(&mask, GFP_KERNEL); for_each_irq(irq) { + struct irq_chip *chip; + desc = irq_to_desc(irq); if (!desc) continue; @@ -320,13 +324,15 @@ void fixup_irqs(const struct cpumask *map) if (desc->status & IRQ_PER_CPU) continue; - cpumask_and(mask, desc->affinity, map); + chip = get_irq_desc_chip(desc); + + cpumask_and(mask, desc->irq_data.affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + if (chip->irq_set_affinity) + chip->irq_set_affinity(&desc->irq_data, mask, true); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } @@ -1145,11 +1151,14 @@ static int virq_debug_show(struct seq_file *m, void *private) raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { + struct irq_chip *chip; + seq_printf(m, "%5d ", i); seq_printf(m, "0x%05lx ", virq_to_hw(i)); - if (desc->chip && desc->chip->name) - p = desc->chip->name; + chip = get_irq_desc_chip(desc); + if (chip && chip->name) + p = chip->name; else p = none; seq_printf(m, "%-15s ", p); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 49a170af8145..976de37fe5b3 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -26,20 +26,23 @@ void machine_kexec_mask_interrupts(void) { for_each_irq(i) { struct irq_desc *desc = irq_to_desc(i); + struct irq_chip *chip; - if (!desc || !desc->chip) + if (!desc) continue; - if (desc->chip->eoi && - desc->status & IRQ_INPROGRESS) - desc->chip->eoi(i); + chip = get_irq_desc_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && desc->status & IRQ_INPROGRESS) + chip->irq_eoi(&desc->irq_data); - if (desc->chip->mask) - desc->chip->mask(i); + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); - if (desc->chip->disable && - !(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); + if (chip->irq_disable && !(desc->status & IRQ_DISABLED)) + chip->irq_disable(&desc->irq_data); } } -- cgit v1.2.2 From 0837e3242c73566fc1c0196b4ec61779c25ffc93 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 9 Mar 2011 14:38:42 +1100 Subject: perf, powerpc: Handle events that raise an exception without overflowing Events on POWER7 can roll back if a speculative event doesn't eventually complete. Unfortunately in some rare cases they will raise a performance monitor exception. We need to catch this to ensure we reset the PMC. In all cases the PMC will be 256 or less cycles from overflow. Signed-off-by: Anton Blanchard Signed-off-by: Peter Zijlstra Cc: # as far back as it applies cleanly LKML-Reference: <20110309143842.6c22845e@kryten> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index ab6f6beadb57..97e0ae414940 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1269,6 +1269,28 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) return ip; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + /* * Performance monitor interrupt stuff */ @@ -1316,7 +1338,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (is_limited_pmc(i + 1)) continue; val = read_pmc(i + 1); - if ((int)val < 0) + if (pmc_overflow(val)) write_pmc(i + 1, 0); } } -- cgit v1.2.2 From 90407c9976939e061f32b0e07602e5ce4887d9e8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2011 10:57:57 +1100 Subject: powerpc/pci: Fix crash in PCI code on ppc64 when matching device nodes Commit b5d937de0367d26f65b9af1aef5f2c34c1939be0 has a bug which causes basically a NULL dereference in the PCI code during boot on ppc64 machines. fetch_dev_dn() is called when dev->dev.of_node is NULL, so using that as the starting point for the search makes no sense. It should instead start from the device node of the PHB. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_dn.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 29852688ceaa..d225d99fe39d 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -176,11 +176,14 @@ static void *is_devfn_node(struct device_node *dn, void *data) */ struct device_node *fetch_dev_dn(struct pci_dev *dev) { - struct device_node *orig_dn = dev->dev.of_node; + struct pci_controller *phb = dev->sysdata; struct device_node *dn; unsigned long searchval = (dev->bus->number << 8) | dev->devfn; - dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval); + if (WARN_ON(!phb)) + return NULL; + + dn = traverse_pci_devices(phb->dn, is_devfn_node, (void *)searchval); if (dn) dev->dev.of_node = dn; return dn; -- cgit v1.2.2 From a71f5d5d279375205009a4be56a3cf6682921292 Mon Sep 17 00:00:00 2001 From: Mike Wolf Date: Mon, 21 Mar 2011 11:14:53 +1100 Subject: powerpc/ptrace: Remove BUG_ON when full register set not available In some cases during a threaded core dump not all the threads will have a full register set. This happens when the signal causing the core dump races with a thread exiting. The race happens when the exiting thread has entered the kernel for the last time before the signal arrives, but doesn't get far enough through the exit code to avoid being included in the core dump. So we get a thread included in the core dump which is never going to go out to userspace again and only has a partial register set recorded Normally we would catch each thread as it is about to go into userspace and capture the full register set then. However, this exiting thread is never going to go out to userspace again, so we have no way to capture its full register set. It doesn't really matter, though, as this is a thread which is effectively already dead. So instead of hitting a BUG() in this case (a really bad choice of action in the first place), we use a poison value for the register values. [BenH]: Some cosmetic/stylistic changes and fix build on ppc32 Signed-off-by: Mike Wolf Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 906536998291..895b082f1e48 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -229,12 +229,16 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; + int i, ret; if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, target->thread.regs, @@ -641,11 +645,16 @@ static int gpr32_get(struct task_struct *target, compat_ulong_t *k = kbuf; compat_ulong_t __user *u = ubuf; compat_ulong_t reg; + int i; if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } pos /= sizeof(reg); count /= sizeof(reg); -- cgit v1.2.2 From b6a84016bd2598e35ead635147fa53619982648d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:42 -0700 Subject: mm: NUMA aware alloc_thread_info_node() Add a node parameter to alloc_thread_info(), and change its name to alloc_thread_info_node() This change is needed to allow NUMA aware kthread_create_on_cpu() Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Cc: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8303a6c65ef7..f74f355a9617 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1218,11 +1218,11 @@ void __ppc64_runlatch_off(void) static struct kmem_cache *thread_info_cache; -struct thread_info *alloc_thread_info(struct task_struct *tsk) +struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) { struct thread_info *ti; - ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); + ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node); if (unlikely(ti == NULL)) return NULL; #ifdef CONFIG_DEBUG_STACK_USAGE -- cgit v1.2.2 From 31db58b3ab432f72ea76be58b12e6ffaf627d5db Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:15 -0400 Subject: mm: arch: make get_gate_vma take an mm_struct instead of a task_struct Morally, the presence of a gate vma is more an attribute of a particular mm than a particular task. Moreover, dropping the dependency on task_struct will help make both existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729abc..6169f1756930 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -830,7 +830,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } -- cgit v1.2.2 From 83b964bbf82eb13a8f31bb49ca420787fe01f7a6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:16 -0400 Subject: mm: arch: make in_gate_area take an mm_struct instead of a task_struct Morally, the question of whether an address lies in a gate vma should be asked with respect to an mm, not a particular task. Moreover, dropping the dependency on task_struct will help make existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6169f1756930..467aa9ecbf9d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -825,7 +825,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -- cgit v1.2.2 From cae5d39032acf26c265f6b1dc73d7ce6ff4bc387 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:17 -0400 Subject: mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm Now that gate vma's are referenced with respect to a particular mm and not a particular task it only makes sense to propagate the change to this predicate as well. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 467aa9ecbf9d..142ab1008c3b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,7 +820,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -- cgit v1.2.2 From 388b78adc9899f0299128610f566051d0b1a57f6 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 23 Mar 2011 16:43:03 -0700 Subject: rapidio: modify configuration to support PCI-SRIO controller 1. Add an option to include RapidIO support if the PCI is available. 2. Add FSL_RIO configuration option to enable controller selection. 3. Add RapidIO support option into x86 and MIPS architectures. Signed-off-by: Alexandre Bounine Acked-by: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: Micha Nelissen Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 5c518ad3445c..913611105c1f 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -64,7 +64,7 @@ _GLOBAL(__setup_cpu_e500v2) bl __e500_icache_setup bl __e500_dcache_setup bl __setup_e500_ivors -#ifdef CONFIG_RAPIDIO +#ifdef CONFIG_FSL_RIO /* Ensure that RFXE is set */ mfspr r3,SPRN_HID1 oris r3,r3,HID1_RFXE@h -- cgit v1.2.2 From 93a72052be81823fa1584b9be037d51924f9efa4 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 23 Mar 2011 16:43:29 -0700 Subject: crash_dump: export is_kdump_kernel to modules, consolidate elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn The Xen PV drivers in a crashed HVM guest can not connect to the dom0 backend drivers because both frontend and backend drivers are still in connected state. To run the connection reset function only in case of a crashdump, the is_kdump_kernel() function needs to be available for the PV driver modules. Consolidate elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn into kernel/crash_dump.c Also export elfcorehdr_addr to make is_kdump_kernel() usable for modules. Leave 'elfcorehdr' as early_param(). This changes powerpc from __setup() to early_param(). It adds an address range check from x86 also on ia64 and powerpc. [akpm@linux-foundation.org: additional #includes] [akpm@linux-foundation.org: remove elfcorehdr_addr export] [akpm@linux-foundation.org: fix for Tejun's mm/nobootmem.c changes] Signed-off-by: Olaf Hering Cc: Russell King Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mundt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/crash_dump.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 0a2af50243cb..424afb6b8fba 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -28,9 +28,6 @@ #define DBG(fmt...) #endif -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; - #ifndef CONFIG_RELOCATABLE void __init reserve_kdump_trampoline(void) { @@ -72,20 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_RELOCATABLE */ -/* - * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by - * is_kdump_kernel() to determine if we are booting after a panic. Hence - * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. - */ -static int __init parse_elfcorehdr(char *p) -{ - if (p) - elfcorehdr_addr = memparse(p, &p); - - return 1; -} -__setup("elfcorehdr=", parse_elfcorehdr); - static int __init parse_savemaxmem(char *p) { if (p) -- cgit v1.2.2 From 0415b00d175e0d8945e6785aad21b5f157976ce0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Mar 2011 18:50:09 +0100 Subject: percpu: Always align percpu output section to PAGE_SIZE Percpu allocator honors alignment request upto PAGE_SIZE and both the percpu addresses in the percpu address space and the translated kernel addresses should be aligned accordingly. The calculation of the former depends on the alignment of percpu output section in the kernel image. The linker script macros PERCPU_VADDR() and PERCPU() are used to define this output section and the latter takes @align parameter. Several architectures are using @align smaller than PAGE_SIZE breaking percpu memory alignment. This patch removes @align parameter from PERCPU(), renames it to PERCPU_SECTION() and makes it always align to PAGE_SIZE. While at it, add PCPU_SETUP_BUG_ON() checks such that alignment problems are reliably detected and remove percpu alignment comment recently added in workqueue.c as the condition would trigger BUG way before reaching there. For um, this patch raises the alignment of percpu area. As the area is in .init, there shouldn't be any noticeable difference. This problem was discovered by David Howells while debugging boot failure on mn10300. Signed-off-by: Tejun Heo Acked-by: Mike Frysinger Cc: uclinux-dist-devel@blackfin.uclinux.org Cc: David Howells Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b9150f07d266..920276c0f6a1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { -- cgit v1.2.2 From 98488db9ff01849354bffb6a9675b1cc2ecf03fd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Mar 2011 15:43:57 +0100 Subject: powerpc: Use proper accessors for IRQ_* flags Use the proper accessors instead of open access to irq_desc. Converted with coccinelle. Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/machine_kexec.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0a5570338b96..8f7da4402e9b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -618,7 +618,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); /* Clear norequest flags */ - irq_to_desc(i)->status &= ~IRQ_NOREQUEST; + irq_clear_status_flags(i, IRQ_NOREQUEST); /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index bd1e1ff17b2d..2d65e9eef31e 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -35,13 +35,13 @@ void machine_kexec_mask_interrupts(void) { if (!chip) continue; - if (chip->irq_eoi && desc->status & IRQ_INPROGRESS) + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); if (chip->irq_mask) chip->irq_mask(&desc->irq_data); - if (chip->irq_disable && !(desc->status & IRQ_DISABLED)) + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) chip->irq_disable(&desc->irq_data); } } -- cgit v1.2.2 From 7bfbc1f28311d680e45d7122ecd48dec57703750 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Mar 2011 16:36:35 +0100 Subject: powerpc: irq: Use irqdata based information We want to tighten the irq_desc access. So use the new accessors for the same information. Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/irq.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8f7da4402e9b..58d401265328 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -276,7 +276,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, " %-16s", chip->name); else seq_printf(p, " %-16s", "None"); - seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); + seq_printf(p, " %-8s", (irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); if (action) { seq_printf(p, " %s", action->name); @@ -315,24 +315,26 @@ void fixup_irqs(const struct cpumask *map) alloc_cpumask_var(&mask, GFP_KERNEL); for_each_irq(irq) { + struct irq_data *data; struct irq_chip *chip; desc = irq_to_desc(irq); if (!desc) continue; - if (desc->status & IRQ_PER_CPU) + data = irq_desc_get_irq_data(desc); + if (irqd_is_per_cpu(data)) continue; - chip = get_irq_desc_chip(desc); + chip = irq_data_get_irq_chip(data); - cpumask_and(mask, desc->irq_data.affinity, map); + cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } if (chip->irq_set_affinity) - chip->irq_set_affinity(&desc->irq_data, mask, true); + chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } @@ -827,7 +829,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, /* Set type if specified and different than the current one */ if (type != IRQ_TYPE_NONE && - type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) + type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) set_irq_type(virq, type); return virq; } -- cgit v1.2.2 From ec775d0e70eb6b7116406b3441cb8501c2849dd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Mar 2011 16:45:20 +0100 Subject: powerpc: Convert to new irq_* function names Scripted with coccinelle. Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/irq.c | 6 +++--- arch/powerpc/kernel/machine_kexec.c | 2 +- arch/powerpc/kernel/pci-common.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 58d401265328..a155dfde9ac3 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -830,7 +830,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, /* Set type if specified and different than the current one */ if (type != IRQ_TYPE_NONE && type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) - set_irq_type(virq, type); + irq_set_irq_type(virq, type); return virq; } EXPORT_SYMBOL_GPL(irq_create_of_mapping); @@ -853,7 +853,7 @@ void irq_dispose_mapping(unsigned int virq) return; /* remove chip and handler */ - set_irq_chip_and_handler(virq, NULL, NULL); + irq_set_chip_and_handler(virq, NULL, NULL); /* Make sure it's completed */ synchronize_irq(virq); @@ -1158,7 +1158,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%5d ", i); seq_printf(m, "0x%05lx ", virq_to_hw(i)); - chip = get_irq_desc_chip(desc); + chip = irq_desc_get_chip(desc); if (chip && chip->name) p = chip->name; else diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 2d65e9eef31e..7ee50f0547cb 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -31,7 +31,7 @@ void machine_kexec_mask_interrupts(void) { if (!desc) continue; - chip = get_irq_desc_chip(desc); + chip = irq_desc_get_chip(desc); if (!chip) continue; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 3cd85faa8ac6..893af2a9cd03 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -261,7 +261,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_mapping(NULL, line); if (virq != NO_IRQ) - set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); + irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", oirq.size, oirq.specifier[0], oirq.specifier[1], -- cgit v1.2.2 From 433c9c67c53cc42b2824532d3abb7ac62970d3fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Mar 2011 17:04:59 +0100 Subject: powerpc: Use generic show_interrupts() Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/irq.c | 61 +---------------------------------------------- 1 file changed, 1 insertion(+), 60 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a155dfde9ac3..63625e0650b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -195,7 +195,7 @@ notrace void arch_local_irq_restore(unsigned long en) EXPORT_SYMBOL(arch_local_irq_restore); #endif /* CONFIG_PPC64 */ -static int show_other_interrupts(struct seq_file *p, int prec) +int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -231,65 +231,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) return 0; } -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j, prec; - struct irqaction *action; - struct irq_desc *desc; - struct irq_chip *chip; - - if (i > nr_irqs) - return 0; - - for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) - j *= 10; - - if (i == nr_irqs) - return show_other_interrupts(p, prec); - - /* print header */ - if (i == 0) { - seq_printf(p, "%*s", prec + 8, ""); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d", j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - if (!desc) - return 0; - - raw_spin_lock_irqsave(&desc->lock, flags); - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%*d: ", prec, i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); - - chip = get_irq_desc_chip(desc); - if (chip) - seq_printf(p, " %-16s", chip->name); - else - seq_printf(p, " %-16s", "None"); - seq_printf(p, " %-8s", (irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - /* * /proc/stat helpers */ -- cgit v1.2.2 From 6090912c4abcfc6c81b156cf2bb4cda23ae6e847 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 24 Mar 2011 20:50:06 +0000 Subject: powerpc: Implement dma_mmap_coherent() This is used by Alsa to mmap buffers allocated with dma_alloc_coherent() into userspace. We need a special variant to handle machines with non-coherent DMAs as those buffers have "special" virt addresses and require non-cachable mappings Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index cf02cad62d9a..d238c082c3c5 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -179,3 +179,21 @@ static int __init dma_init(void) return 0; } fs_initcall(dma_init); + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size) +{ + unsigned long pfn; + +#ifdef CONFIG_NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); +#else + pfn = page_to_pfn(virt_to_page(cpu_addr)); +#endif + return remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} +EXPORT_SYMBOL_GPL(dma_mmap_coherent); -- cgit v1.2.2 From ad5d1c888e556bc00c4e86f452cad4a3a87d22c1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 20 Mar 2011 15:28:03 +0000 Subject: powerpc: Fix accounting of softirq time when idle commit cf9efce0ce31 (powerpc: Account time using timebase rather than PURR) used in_irq() to detect if the time was spent in interrupt processing. This only catches hardirq context so if we are in softirq context and in the idle loop we end up accounting it as idle time. If we instead use in_interrupt() we catch both softirq and hardirq time. The issue was found when running a network intensive workload. top showed the following: 0.0%us, 1.1%sy, 0.0%ni, 85.7%id, 0.0%wa, 9.9%hi, 3.3%si, 0.0%st 85.7% idle. But this was wildly different to the perf events data. To confirm the suspicion I ran something to keep the core busy: # yes > /dev/null & 8.2%us, 0.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 10.3%hi, 81.4%si, 0.0%st We only got 8.2% of the CPU for the userspace task and softirq has shot up to 81.4%. With the patch below top shows the correct stats: 0.0%us, 0.0%sy, 0.0%ni, 5.3%id, 0.0%wa, 13.3%hi, 81.3%si, 0.0%st Signed-off-by: Anton Blanchard Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 09d31dbf43f9..aa9269600ca2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -356,7 +356,7 @@ void account_system_vtime(struct task_struct *tsk) } get_paca()->user_time_scaled += user_scaled; - if (in_irq() || idle_task(smp_processor_id()) != tsk) { + if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { account_system_time(tsk, 0, delta, sys_scaled); if (stolen) account_steal_time(stolen); -- cgit v1.2.2 From 84493804bbbc10291492661440728668b2b06abe Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 6 Mar 2011 18:09:07 +0000 Subject: powerpc/mm: Move the STAB0 location to 0x8000 to make room in low memory Recent upstream builds with allmodconfig fail due to lack of space between 0x3000 and 0x6000. We have a hard block at 0x7000 but we can spare a page by moving the STAB0 from 0x6000 to 0x8000. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8a817995b4cd..c532cb2c927a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -977,20 +977,6 @@ _GLOBAL(do_stab_bolted) rfid b . /* prevent speculative execution */ -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is given to the hv - * as a page number (see xLparMap below), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_OFFSET /* 0x6000 */ - .globl initial_stab -initial_stab: - .space 4096 - #ifdef CONFIG_PPC_PSERIES /* * Data area reserved for FWNMI option. @@ -1027,3 +1013,17 @@ xLparMap: #ifdef CONFIG_PPC_PSERIES . = 0x8000 #endif /* CONFIG_PPC_PSERIES */ + +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is given to the hv + * as a page number (see xLparMap above), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_OFFSET /* 0x8000 */ + .globl initial_stab +initial_stab: + .space 4096 -- cgit v1.2.2 From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/head_40x.S | 2 +- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/kernel/l2cr_6xx.S | 2 +- arch/powerpc/kernel/lparcfg.c | 2 +- arch/powerpc/kernel/perf_event.c | 2 +- arch/powerpc/kernel/ppc_save_regs.S | 2 +- arch/powerpc/kernel/prom.c | 4 ++-- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/kernel/swsusp_32.S | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/kernel/udbg_16550.c | 2 +- arch/powerpc/kernel/vdso32/sigtramp.S | 2 +- arch/powerpc/kernel/vdso64/sigtramp.S | 2 +- 19 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 625942ae5585..60b3e377b1e4 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -99,7 +99,7 @@ void __init btext_prepare_BAT(void) /* This function can be used to enable the early boot text when doing * OF booting or within bootx init. It must be followed by a btext_unmap() - * call before the logical address becomes unuseable + * call before the logical address becomes unusable */ void __init btext_setup_display(int width, int height, int depth, int pitch, unsigned long address) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 5c43063d2506..9651acc3504a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -379,7 +379,7 @@ interrupt_end_book3e: mfspr r13,SPRN_SPRG_PACA /* get our PACA */ b system_call_common -/* Auxillary Processor Unavailable Interrupt */ +/* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c532cb2c927a..aeb739e18769 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -5,7 +5,7 @@ * handling and other fixed offset specific things. * * This file is meant to be #included from head_64.S due to - * position dependant assembly. + * position dependent assembly. * * Most of this originates from head_64.S and thus has the same * copyright history. diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 9dd21a8c4d52..a91626d87fc9 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -766,7 +766,7 @@ DataAccess: * miss get to this point to load the TLB. * r10 - TLB_TAG value * r11 - Linux PTE - * r12, r9 - avilable to use + * r12, r9 - available to use * PID - loaded with proper value when we get here * Upon exit, we reload everything and RFI. * Actually, it will fit now, but oh well.....a common place diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index cbb3436b592d..5e12b741ba5f 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -178,7 +178,7 @@ interrupt_base: NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0x0c00, DoSyscall) - /* Auxillary Processor Unavailable Interrupt */ + /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 782f23df7c85..285e6f775bdf 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,7 +40,7 @@ #include #include -/* The physical memory is layed out such that the secondary processor +/* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow * using the layout described in exceptions-64s.S */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3e02710d9562..5ecf54cfa7d4 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -326,7 +326,7 @@ interrupt_base: NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0x0c00, DoSyscall) - /* Auxillary Processor Unavailable Interrupt */ + /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2a2f3c3f6d80..97ec8557f974 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -151,7 +151,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /**** Might be a good idea to set L2DO here - to prevent instructions from getting into the cache. But since we invalidate the next time we enable the cache it doesn't really matter. - Don't do this unless you accomodate all processor variations. + Don't do this unless you accommodate all processor variations. The bit moved on the 7450..... ****/ diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 16468362ad57..301db65f05a1 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -262,7 +262,7 @@ static void parse_ppp_data(struct seq_file *m) seq_printf(m, "system_active_processors=%d\n", ppp_data.active_system_procs); - /* pool related entries are apropriate for shared configs */ + /* pool related entries are appropriate for shared configs */ if (lppaca_of(0).shared_proc) { unsigned long pool_idle_time, pool_procs; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 97e0ae414940..c4063b7f49a0 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -759,7 +759,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) /* * If group events scheduling transaction was started, - * skip the schedulability test here, it will be peformed + * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ if (cpuhw->group_flag & PERF_EVENT_TXN) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index e83ba3f078e4..1b1787d52896 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -15,7 +15,7 @@ /* * Grab the register values as they are now. - * This won't do a particularily good job because we really + * This won't do a particularly good job because we really * want our caller's caller's registers, and our caller has * already executed its prologue. * ToDo: We could reach back into the caller's save area to do diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 05b7139d6a27..e74fa12afc82 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -683,7 +683,7 @@ void __init early_init_devtree(void *params) #endif #ifdef CONFIG_PHYP_DUMP - /* scan tree to see if dump occured during last boot */ + /* scan tree to see if dump occurred during last boot */ of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); #endif @@ -739,7 +739,7 @@ void __init early_init_devtree(void *params) DBG("Scanning CPUs ...\n"); - /* Retreive CPU related informations from the flat tree + /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 895b082f1e48..55613e33e263 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -463,7 +463,7 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX /* * Currently to set and and get all the vsx state, you need to call - * the fp and VMX calls aswell. This only get/sets the lower 32 + * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 7980ec0e1e1a..67f6c3b51357 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -465,7 +465,7 @@ static void start_event_scan(void) pr_debug("rtasd: will sleep for %d milliseconds\n", (30000 / rtas_event_scan_rate)); - /* Retreive errors from nvram if any */ + /* Retrieve errors from nvram if any */ retreive_nvram_error_log(); schedule_delayed_work_on(cpumask_first(cpu_online_mask), diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index b0754e237438..ba4dee3d233f 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -143,7 +143,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* Disable MSR:DR to make sure we don't take a TLB or * hash miss during the copy, as our hash table will - * for a while be unuseable. For .text, we assume we are + * for a while be unusable. For .text, we assume we are * covered by a BAT. This works only for non-G5 at this * point. G5 will need a better approach, possibly using * a small temporary hash table filled with large mappings, diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bd74fac169be..5ddb801bc154 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -959,7 +959,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) * ESR_DST (!?) or 0. In the process of chasing this with the * hardware people - not sure if it can happen on any illegal * instruction or only on FP instructions, whether there is a - * pattern to occurences etc. -dgibson 31/Mar/2003 */ + * pattern to occurrences etc. -dgibson 31/Mar/2003 */ switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index b4b167b33643..baa33a7517bc 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -1,5 +1,5 @@ /* - * udbg for NS16550 compatable serial ports + * udbg for NS16550 compatible serial ports * * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp * diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S index 68d49dd71dcc..cf0c9c9c24f9 100644 --- a/arch/powerpc/kernel/vdso32/sigtramp.S +++ b/arch/powerpc/kernel/vdso32/sigtramp.S @@ -19,7 +19,7 @@ /* The nop here is a hack. The dwarf2 unwind routines subtract 1 from the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artifically + call instruction. Since we don't have a call here, we artificially extend the range covered by the unwind info by adding a nop before the real start. */ nop diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 59eb59bb4082..45ea281e9a21 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -20,7 +20,7 @@ /* The nop here is a hack. The dwarf2 unwind routines subtract 1 from the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artifically + call instruction. Since we don't have a call here, we artificially extend the range covered by the unwind info by padding before the real start. */ nop -- cgit v1.2.2 From 963e5d3b76d657f1ebcf3561446d2ba1872bbfa2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 29 Mar 2011 14:51:10 +1100 Subject: powerpc: Make decrementer interrupt robust against offlined CPUs With some implementations, it is possible that a timer interrupt occurs every few seconds on an offline CPU. In this case, just re-arm the decrementer and return immediately Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index aa9269600ca2..375480c56eb9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -577,14 +577,21 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. + */ + set_dec(DECREMENTER_MAX); + + /* Some implementations of hotplug will get timer interrupts while + * offline, just ignore these + */ + if (!cpu_online(smp_processor_id())) + return; + trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continuue to take decrementer exceptions */ - set_dec(DECREMENTER_MAX); - #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); -- cgit v1.2.2 From fa3f82c8bb7acbe049ea71f258b3ae0a33d9d40b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 10 Feb 2011 18:45:24 +1100 Subject: powerpc/smp: soft-replugged CPUs must go back to start_secondary Various thing are torn down when a CPU is hot-unplugged. That CPU is expected to go back to start_secondary when re-plugged to re initialize everything, such as clock sources, maps, ... Some implementations just return from cpu_die() callback in the idle loop when the CPU is "re-plugged". This is not enough. We fix it using a little asm trampoline which resets the stack and calls back into start_secondary as if we were all fresh from boot. The trampoline already existed on ppc64, but we add it for ppc32 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_32.S | 9 +++++++++ arch/powerpc/kernel/smp.c | 9 +++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 98c4b29a56f4..c5c24beb8387 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -890,6 +890,15 @@ __secondary_start: mtspr SPRN_SRR1,r4 SYNC RFI + +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 981360509172..1c9956c43801 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -502,7 +502,7 @@ static struct device_node *cpu_to_l2cache(int cpu) } /* Activate a secondary processor. */ -int __devinit start_secondary(void *unused) +void __devinit start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); struct device_node *l2_cache; @@ -558,7 +558,8 @@ int __devinit start_secondary(void *unused) local_irq_enable(); cpu_idle(); - return 0; + + BUG(); } int setup_profiling_timer(unsigned int multiplier) @@ -660,5 +661,9 @@ void cpu_die(void) { if (ppc_md.cpu_die) ppc_md.cpu_die(); + + /* If we return, we re-enter start_secondary */ + start_secondary_resume(); } + #endif -- cgit v1.2.2 From 4fcb8833af3355065bd8bffcd338eabc6f3a38a0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 10 Feb 2011 18:46:50 +1100 Subject: powerpc/smp: Fix generic_mach_cpu_die() This is used by some "soft" hotplug implementations. I needs to call idle_task_exit() when the CPU is going away, and we remove the now no-longer needed set_cpu_online() and local_irq_enable() which are handled by the return to start_secondary Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1c9956c43801..3c0fab5e1e16 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -362,14 +362,13 @@ void generic_mach_cpu_die(void) unsigned int cpu; local_irq_disable(); + idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); __get_cpu_var(cpu_state) = CPU_DEAD; smp_wmb(); while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); - set_cpu_online(cpu, true); - local_irq_enable(); } #endif -- cgit v1.2.2 From b527d07114fdab83f39040c69b4b0a4b1b232c16 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 11 Feb 2011 12:46:41 +1100 Subject: powerpc/smp: Remove unused generic_cpu_enable() Nobody uses it, besides we should always use the normal __cpu_up path anyways Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3c0fab5e1e16..19d0c2576282 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -322,28 +322,6 @@ int generic_cpu_disable(void) return 0; } -int generic_cpu_enable(unsigned int cpu) -{ - /* Do the normal bootup if we haven't - * already bootstrapped. */ - if (system_state != SYSTEM_RUNNING) - return -ENOSYS; - - /* get the target out of it's holding state */ - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - smp_wmb(); - - while (!cpu_online(cpu)) - cpu_relax(); - -#ifdef CONFIG_PPC64 - fixup_irqs(cpu_online_mask); - /* counter the irq disable in fixup_irqs */ - local_irq_enable(); -#endif - return 0; -} - void generic_cpu_die(unsigned int cpu) { int i; -- cgit v1.2.2 From 7a53a4fe707a93a33f6c5d42173bf213cb6ff71d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 11 Feb 2011 12:49:01 +1100 Subject: powerpc/smp: Remove unused smp_ops->cpu_enable() Remove the last remnants of cpu_enable(), everybody uses the normal __cpu_up() path now Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 19d0c2576282..be7d7282341c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -350,21 +350,11 @@ void generic_mach_cpu_die(void) } #endif -static int __devinit cpu_enable(unsigned int cpu) -{ - if (smp_ops && smp_ops->cpu_enable) - return smp_ops->cpu_enable(cpu); - - return -ENOSYS; -} - int __cpuinit __cpu_up(unsigned int cpu) { int c; secondary_ti = current_set[cpu]; - if (!cpu_enable(cpu)) - return 0; if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) -- cgit v1.2.2 From 1c91cc570576dfd0f288d664c095d64d11aaace4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 11 Feb 2011 13:05:17 +1100 Subject: powerpc/pmac/smp: Rename fixup_irqs() to migrate_irqs() and use it on ppc32 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 3 ++- arch/powerpc/kernel/smp.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 63625e0650b5..f621b7d2d869 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -246,12 +246,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(const struct cpumask *map) +void migrate_irqs(void) { struct irq_desc *desc; unsigned int irq; static int warned; cpumask_var_t mask; + const struct cpumask *map = cpu_online_mask; alloc_cpumask_var(&mask, GFP_KERNEL); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index be7d7282341c..f6cc5c19c6ac 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -317,8 +317,8 @@ int generic_cpu_disable(void) set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; - fixup_irqs(cpu_online_mask); #endif + migrate_irqs(); return 0; } -- cgit v1.2.2 From 62cc67b9df273be18fcb09a071592dedf751c90a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Feb 2011 16:49:58 +1100 Subject: powerpc/pmac/smp: Properly NAP offlined CPU on G5 The current code soft-disables, and then goes to NAP mode which turns interrupts on. That means that if an interrupt occurs, we will hit the masked interrupt code path which isn't what we want, as it will return with EE off, which will either get us out of NAP mode, or fail to enter it (according to spec). Instead, let's just rely on the fact that it is safe to take decrementer interrupts on an offline CPU and leave interrupts enabled. We can also get rid of the special case in asm for power4_cpu_offline_powersave() and just use power4_idle(). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 7 +++++++ arch/powerpc/kernel/idle_power4.S | 21 --------------------- 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 782f23df7c85..271140b38b6f 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -536,6 +536,13 @@ _GLOBAL(pmac_secondary_start) add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 5328709eeedc..ba3195478600 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -53,24 +53,3 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) isync b 1b -_GLOBAL(power4_cpu_offline_powersave) - /* Go to NAP now */ - mfmsr r7 - rldicl r0,r7,48,1 - rotldi r0,r0,16 - mtmsrd r0,1 /* hard-disable interrupts */ - li r0,1 - li r6,0 - stb r0,PACAHARDIRQEN(r13) /* we'll hard-enable shortly */ - stb r6,PACASOFTIRQEN(r13) /* soft-disable irqs */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - ori r7,r7,MSR_EE - oris r7,r7,MSR_POW@h - sync - isync - mtmsrd r7 - isync - blr -- cgit v1.2.2 From d72944457bb7d5c4be43aa1b741cb93c69484c20 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 8 Mar 2011 13:50:37 +1100 Subject: powerpc/smp: Add a smp_ops->bringup_up() done callback This allows us to stop abusing smp_ops->setup_cpu() for cleanup tasks that have to take place after the initial boot time CPU bringup. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f6cc5c19c6ac..df3739713edd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -553,7 +553,11 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); + if (smp_ops && smp_ops->bringup_done) + smp_ops->bringup_done(); + dump_numa_cpu_topology(); + } int arch_sd_sibling_asym_packing(void) -- cgit v1.2.2 From 105765f451d3ff007bb4ae3761e825686d9615db Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 1 Apr 2011 09:23:37 +1100 Subject: powerpc/smp: Don't expose per-cpu "cpu_state" array Instead, keep it static, expose an accessor and use that from the PowerMac code. Avoids easy namespace collisions and will make it easier to consolidate with other implementations. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index df3739713edd..d7f8cc18ae05 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -305,7 +305,7 @@ void __devinit smp_prepare_boot_cpu(void) #ifdef CONFIG_HOTPLUG_CPU /* State of each CPU during hotplug phases */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; int generic_cpu_disable(void) { @@ -348,6 +348,11 @@ void generic_mach_cpu_die(void) while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } + +void generic_set_cpu_dead(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_DEAD; +} #endif int __cpuinit __cpu_up(unsigned int cpu) -- cgit v1.2.2 From c56e58537d504706954a06570b4034c04e5b7500 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 8 Mar 2011 14:40:04 +1100 Subject: powerpc/smp: Create idle threads on demand and properly reset them Instead of creating idle threads at boot for all possible CPUs, we create them on demand, like x86 or ARM, and we properly call init_idle to re-initialize an idle thread when a CPU was unplugged and is now re-plugged. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 100 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d7f8cc18ae05..54faff91b805 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,6 +57,25 @@ #define DBG(fmt...) #endif + +/* Store all idle threads, this can be reused instead of creating +* a new thread. Also avoids complicated thread destroy functionality +* for idle threads. +*/ +#ifdef CONFIG_HOTPLUG_CPU +/* + * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is + * removed after init for !CONFIG_HOTPLUG_CPU. + */ +static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); +#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) +#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) +#else +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) +#endif + struct thread_info *secondary_ti; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); @@ -238,23 +257,6 @@ static void __devinit smp_store_cpu_info(int id) per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); } -static void __init smp_create_idle(unsigned int cpu) -{ - struct task_struct *p; - - /* create a process for the processor */ - p = fork_idle(cpu); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); -#ifdef CONFIG_PPC64 - paca[cpu].__current = p; - paca[cpu].kstack = (unsigned long) task_thread_info(p) - + THREAD_SIZE - STACK_FRAME_OVERHEAD; -#endif - current_set[cpu] = task_thread_info(p); - task_thread_info(p)->cpu = cpu; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int cpu; @@ -288,10 +290,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = NR_CPUS; else max_cpus = 1; - - for_each_possible_cpu(cpu) - if (cpu != boot_cpuid) - smp_create_idle(cpu); } void __devinit smp_prepare_boot_cpu(void) @@ -355,9 +353,62 @@ void generic_set_cpu_dead(unsigned int cpu) } #endif +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + +static int __cpuinit create_idle(unsigned int cpu) +{ + struct thread_info *ti; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); + + c_idle.idle = get_idle_for_cpu(cpu); + + /* We can't use kernel_thread since we must avoid to + * reschedule the child. We use a workqueue because + * we want to fork from a kernel thread, not whatever + * userspace process happens to be trying to online us. + */ + if (!c_idle.idle) { + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + } else + init_idle(c_idle.idle, cpu); + if (IS_ERR(c_idle.idle)) { + pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle)); + return PTR_ERR(c_idle.idle); + } + ti = task_thread_info(c_idle.idle); + +#ifdef CONFIG_PPC64 + paca[cpu].__current = c_idle.idle; + paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; +#endif + ti->cpu = cpu; + current_set[cpu] = ti; + + return 0; +} + int __cpuinit __cpu_up(unsigned int cpu) { - int c; + int rc, c; secondary_ti = current_set[cpu]; @@ -365,6 +416,11 @@ int __cpuinit __cpu_up(unsigned int cpu) (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; + /* Make sure we have an idle thread */ + rc = create_idle(cpu); + if (rc) + return rc; + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ -- cgit v1.2.2 From aeeafbfa7a5692c68d306043878aa2dd785e5230 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 8 Mar 2011 14:49:33 +1100 Subject: powerpc/smp: Increase vdso_data->processorCount, not just decrease it Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 54faff91b805..cbdbb14be4b0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -551,6 +551,10 @@ void __devinit start_secondary(void *unused) secondary_cpu_time_init(); +#ifdef CONFIG_PPC64 + if (system_state == SYSTEM_RUNNING) + vdso_data->processorCount++; +#endif ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); -- cgit v1.2.2 From b987812b3fcaf70fdf0037589e5d2f5f2453e6ce Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 31 Mar 2011 07:27:20 +0000 Subject: powerpc/kexec: Fix mismatched ifdefs for PPC64/SMP. Commit b3df895aebe091b1657 "powerpc/kexec: Add support for FSL-BookE" introduced the original PPC_STD_MMU_64 checks around the function crash_kexec_wait_realmode(). Then commit c2be05481f61252 "powerpc: Fix default_machine_crash_shutdown #ifdef botch" changed the ifdef around the calling site to add a check on SMP, but the ifdef around the function itself was left unchanged, leaving an unused function for PPC_STD_MMU_64=y and SMP=n Rather than have two ifdefs that can get out of sync like this, simply put the corrected conditional around the function and use a stub to get rid of one set of ifdefs completely. Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d569e2aff18..3d3d416339dd 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu) } /* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 +#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) static void crash_kexec_wait_realmode(int cpu) { unsigned int msecs; @@ -188,6 +188,8 @@ static void crash_kexec_wait_realmode(int cpu) } mb(); } +#else +static inline void crash_kexec_wait_realmode(int cpu) {} #endif /* @@ -344,9 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) crash_kexec_wait_realmode(crashing_cpu); -#endif machine_kexec_mask_interrupts(); -- cgit v1.2.2 From c1854e00727f50f7ac99e98d26ece04c087ef785 Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Thu, 31 Mar 2011 19:33:02 +0000 Subject: powerpc: Set nr_cpu_ids early and use it to free PACAs Without this, "holes" in the CPU numbering can cause us to free too many PACAs Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/setup-common.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f4adf89d7614..10f0aadee95b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -203,7 +203,7 @@ void __init free_unused_pacas(void) { int new_size; - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); if (new_size >= paca_size) return; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d4882a46647..21f30cb68077 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -509,6 +509,9 @@ void __init smp_setup_cpu_maps(void) */ cpu_init_thread_core_maps(nthreads); + /* Now that possible cpus are set, set nr_cpu_ids for later use */ + nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + free_unused_pacas(); } #endif /* CONFIG_SMP */ -- cgit v1.2.2 From 1f112cee07b314e244ee9e71d9c1e6950dc13327 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 Apr 2011 22:54:42 +0200 Subject: PM / Hibernate: Introduce CONFIG_HIBERNATE_CALLBACKS Xen save/restore is going to use hibernate device callbacks for quiescing devices and putting them back to normal operations and it would need to select CONFIG_HIBERNATION for this purpose. However, that also would cause the hibernate interfaces for user space to be enabled, which might confuse user space, because the Xen kernels don't support hibernation. Moreover, it would be wasteful, as it would make the Xen kernels include a substantial amount of code that they would never use. To address this issue introduce new power management Kconfig option CONFIG_HIBERNATE_CALLBACKS, such that it will only select the code that is necessary for the hibernate device callbacks to work and make CONFIG_HIBERNATION select it. Then, Xen save/restore will be able to select CONFIG_HIBERNATE_CALLBACKS without dragging the entire hibernate code along with it. Signed-off-by: Rafael J. Wysocki Tested-by: Shriram Rajagopalan --- arch/powerpc/kernel/ibmebus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index c00d4ca1ee15..28581f1ad2c0 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -527,7 +527,7 @@ static int ibmebus_bus_pm_resume_noirq(struct device *dev) #endif /* !CONFIG_SUSPEND */ -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_HIBERNATE_CALLBACKS static int ibmebus_bus_pm_freeze(struct device *dev) { @@ -665,7 +665,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev) return ret; } -#else /* !CONFIG_HIBERNATION */ +#else /* !CONFIG_HIBERNATE_CALLBACKS */ #define ibmebus_bus_pm_freeze NULL #define ibmebus_bus_pm_thaw NULL @@ -676,7 +676,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev) #define ibmebus_bus_pm_poweroff_noirq NULL #define ibmebus_bus_pm_restore_noirq NULL -#endif /* !CONFIG_HIBERNATION */ +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { .prepare = ibmebus_bus_pm_prepare, -- cgit v1.2.2 From 07d9fce24d871785dbd25458469032fea73f17b8 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Wed, 6 Apr 2011 12:56:29 +0530 Subject: powerpc: Check device status before adding serial device serial port nodes with the property status="disabled" are not usable and so avoid adding "disabled" port with the system. Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala --- arch/powerpc/kernel/legacy_serial.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c834757bebc0..2b97b80d6d7d 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void) if (!parent) continue; if (of_match_node(legacy_serial_parents, parent) != NULL) { - index = add_legacy_soc_port(np, np); - if (index >= 0 && np == stdout) - legacy_serial_console = index; + if (of_device_is_available(np)) { + index = add_legacy_soc_port(np, np); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } } of_node_put(parent); } -- cgit v1.2.2 From 11ed0db9f6c7811233632d2ab79c50c011b89902 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 6 Apr 2011 00:11:06 -0500 Subject: powerpc/book3e: Fix CPU feature handling on 64-bit e5500 The CPU_FTRS_POSSIBLE and CPU_FTRS_ALWAYS defines did not encompass e5500 CPU features when built for 64-bit. This causes issues with cpu_has_feature() as it utilizes the POSSIBLE & ALWAYS defines as part of its check. Create a unique CPU_FTRS_E5500 (as its different from CPU_FTRS_E500MC), created a new group for 64-bit Book3e based CPUs and add CPU_FTRS_E5500 to that group. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/cputable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c9b68d07ac4f..b9602ee06deb 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1973,7 +1973,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, .cpu_name = "e5500", - .cpu_features = CPU_FTRS_E500MC, + .cpu_features = CPU_FTRS_E5500, .cpu_user_features = COMMON_USER_BOOKE, .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, -- cgit v1.2.2 From 184748cc50b2dceb8287f9fb657eda48ff8fcfe7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:39 +0200 Subject: sched: Provide scheduler_ipi() callback in response to smp_send_reschedule() For future rework of try_to_wake_up() we'd like to push part of that function onto the CPU the task is actually going to run on. In order to do so we need a generic callback from the existing scheduler IPI. This patch introduces such a generic callback: scheduler_ipi() and implements it as a NOP. BenH notes: PowerPC might use this IPI on offline CPUs under rare conditions! Acked-by: Russell King Acked-by: Martin Schwidefsky Acked-by: Chris Metcalf Acked-by: Jesper Nilsson Acked-by: Benjamin Herrenschmidt Signed-off-by: Ralf Baechle Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152728.744338123@chello.nl --- arch/powerpc/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cbdbb14be4b0..9f9c204bef69 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -116,7 +116,7 @@ void smp_message_recv(int msg) generic_smp_call_function_interrupt(); break; case PPC_MSG_RESCHEDULE: - /* we notice need_resched on exit */ + scheduler_ipi(); break; case PPC_MSG_CALL_FUNC_SINGLE: generic_smp_call_function_single_interrupt(); @@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data) static irqreturn_t reschedule_action(int irq, void *data) { - /* we just need the return path side effect of checking need_resched */ + scheduler_ipi(); return IRQ_HANDLED; } -- cgit v1.2.2 From 7c7a81b53e581d727d069cc45df5510516faac31 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 13 Apr 2011 06:30:08 +0000 Subject: powerpc/kexec: Fix regression causing compile failure on UP Recent commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce caused a compile failure on UP because a considerably large block of the file was included within CONFIG_SMP, hence making a stub function not exposed on UP builds when it needed to be. Relocate the stub to the #else /* ! CONFIG_SMP */ section and also annotate the relevant else/endif so that nobody else falls into the same trap I did. Reported-by: Michael Guntsche Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d3d416339dd..5b5e1f002a8e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu) } /* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) +#ifdef CONFIG_PPC_STD_MMU_64 static void crash_kexec_wait_realmode(int cpu) { unsigned int msecs; @@ -188,9 +188,7 @@ static void crash_kexec_wait_realmode(int cpu) } mb(); } -#else -static inline void crash_kexec_wait_realmode(int cpu) {} -#endif +#endif /* CONFIG_PPC_STD_MMU_64 */ /* * This function will be called by secondary cpus or by kexec cpu @@ -235,7 +233,9 @@ void crash_kexec_secondary(struct pt_regs *regs) crash_ipi_callback(regs); } -#else +#else /* ! CONFIG_SMP */ +static inline void crash_kexec_wait_realmode(int cpu) {} + static void crash_kexec_prepare_cpus(int cpu) { /* @@ -255,7 +255,7 @@ void crash_kexec_secondary(struct pt_regs *regs) { cpus_in_sr = CPU_MASK_NONE; } -#endif +#endif /* CONFIG_SMP */ /* * Register a function to be called on shutdown. Only use this if you -- cgit v1.2.2 From 84ffae55af79d7b8834fd0c08d0d1ebf2c77f91e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Apr 2011 21:44:21 +0000 Subject: powerpc: Fix oops if scan_dispatch_log is called too early We currently enable interrupts before the dispatch log for the boot cpu is setup. If a timer interrupt comes in early enough we oops in scan_dispatch_log: Unable to handle kernel paging request for data at address 0x00000010 ... .scan_dispatch_log+0xb0/0x170 .account_system_vtime+0xa0/0x220 .irq_enter+0x88/0xc0 .do_IRQ+0x48/0x230 The patch below adds a check to scan_dispatch_log to ensure the dispatch log has been allocated. Signed-off-by: Anton Blanchard Cc: Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 375480c56eb9..f33acfd872ad 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb) u64 stolen = 0; u64 dtb; + if (!dtl) + return 0; + if (i == vpa->dtl_idx) return 0; while (i < vpa->dtl_idx) { -- cgit v1.2.2 From 86c74ab317c1ef4d37325e0d7ca8a01a796b0bd7 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 15 Apr 2011 08:12:30 +0000 Subject: powerpc/perf_event: Skip updating kernel counters if register value shrinks Because of speculative event roll back, it is possible for some event coutners to decrease between reads on POWER7. This causes a problem with the way that counters are updated. Delta calues are calculated in a 64 bit value and the top 32 bits are masked. If the register value has decreased, this leaves us with a very large positive value added to the kernel counters. This patch protects against this by skipping the update if the delta would be negative. This can lead to a lack of precision in the coutner values, but from my testing the value is typcially fewer than 10 samples at a time. Signed-off-by: Eric B Munson Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/perf_event.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c4063b7f49a0..822f63008ae1 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], return 0; } +static u64 check_and_compute_delta(u64 prev, u64 val) +{ + u64 delta = (val - prev) & 0xfffffffful; + + /* + * POWER7 can roll back counter values, if the new value is smaller + * than the previous value it will cause the delta and the counter to + * have bogus values unless we rolled a counter over. If a coutner is + * rolled back, it will be smaller, but within 256, which is the maximum + * number of events to rollback at once. If we dectect a rollback + * return 0. This can lead to a small lack of precision in the + * counters. + */ + if (prev > val && (prev - val) < 256) + delta = 0; + + return delta; +} + static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; @@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event) prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); + delta = check_and_compute_delta(prev, val); + if (!delta) + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; local64_add(delta, &event->count); local64_sub(delta, &event->hw.period_left); } @@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, val = (event->hw.idx == 5) ? pmc5 : pmc6; prev = local64_read(&event->hw.prev_count); event->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - local64_add(delta, &event->count); + delta = check_and_compute_delta(prev, val); + if (delta) + local64_add(delta, &event->count); } } @@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, unsigned long pmc5, unsigned long pmc6) { struct perf_event *event; - u64 val; + u64 val, prev; int i; for (i = 0; i < cpuhw->n_limited; ++i) { event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; - local64_set(&event->hw.prev_count, val); + prev = local64_read(&event->hw.prev_count); + if (check_and_compute_delta(prev, val)) + local64_set(&event->hw.prev_count, val); perf_event_update_userpage(event); } } @@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); - delta = (val - prev) & 0xfffffffful; + delta = check_and_compute_delta(prev, val); local64_add(delta, &event->count); /* -- cgit v1.2.2 From 24cc67de62eebbda3ce0c46bdd56582c00dccd03 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 20 Jan 2011 18:50:55 +1100 Subject: powerpc: Define CPU feature for Architected 2.06 HV mode This bit indicates that we are operating in hypervisor mode on a CPU compliant to architecture 2.06 or later (currently server only). We set it on POWER7 and have a boot-time CPU setup function that clears it if MSR:HV isn't set (booting under a hypervisor). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/cpu_setup_power7.S | 65 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 6 ++++ 3 files changed, 72 insertions(+) create mode 100644 arch/powerpc/kernel/cpu_setup_power7.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3bb2a3e6a337..7c6eb4974f25 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 000000000000..f2b317817c4e --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -0,0 +1,65 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +/* Entry: r3 = crap, r4 = ptr to cputable entry + * + * Note that we can be called twice for pseudo-PVRs + */ +_GLOBAL(__setup_cpu_power7) + mflr r11 + bl __init_hvmode_206 + mtlr r11 + beqlr + bl __init_LPCR + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power7) + mflr r11 + mfmsr r3 + rldicl. r0,r3,4,63 + beqlr + bl __init_LPCR + mtlr r11 + blr + +__init_hvmode_206: + /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + mfmsr r3 + rldicl. r0,r3,4,63 + bnelr + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + xor r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +__init_LPCR: + /* Setup a sane LPCR: + * + * LPES = 0b11 (SRR0/1 used for 0x500) + * PECE = 0b111 + * + * Other bits untouched for now + */ + mfspr r3,SPRN_LPCR + ori r3,r3,(LPCR_LPES0|LPCR_LPES1) + ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + mtspr SPRN_LPCR,r3 + isync + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b9602ee06deb..b65b4908d3c7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -423,6 +423,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7 */ @@ -439,6 +441,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7+ */ @@ -455,6 +459,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, { /* Cell Broadband Engine */ -- cgit v1.2.2 From 2dd60d79e0202628a47af9812a84d502cc63628c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 20 Jan 2011 17:50:21 +1100 Subject: powerpc: In HV mode, use HSPRG0 for PACA When running in Hypervisor mode (arch 2.06 or later), we store the PACA in HSPRG0 instead of SPRG1. The architecture specifies that SPRGs may be lost during a "nap" power management operation (though they aren't currently on POWER7) and this enables use of SPRG1 by KVM guests. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- arch/powerpc/kernel/head_64.S | 4 ++-- arch/powerpc/kernel/paca.c | 13 ++++++++++++- 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d82878c4daa6..dbf5bfafd7bc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -838,7 +838,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ - mfspr r4,SPRN_SPRG_PACA /* Get PACA */ + GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 @@ -869,7 +869,7 @@ _STATIC(rtas_restore_regs) REST_8GPRS(14, r1) /* Restore the non-volatiles */ REST_10GPRS(22, r1) /* ditto */ - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index aeb739e18769..6784bf7090f6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -53,7 +53,7 @@ data_access_pSeries: DO_KVM 0x300 mtspr SPRN_SPRG_SCRATCH0,r13 BEGIN_FTR_SECTION - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) std r9,PACA_EXSLB+EX_R9(r13) std r10,PACA_EXSLB+EX_R10(r13) mfspr r10,SPRN_DAR @@ -82,7 +82,7 @@ data_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x380 mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -121,7 +121,7 @@ instruction_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x480 mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -165,7 +165,7 @@ BEGIN_FTR_SECTION beq- 1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) mfspr r11,SPRN_SRR0 ld r12,PACAKBASE(r13) ld r10,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3a319f9c9d3e..39a40400f3f2 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -228,7 +228,7 @@ generic_secondary_common_init: mr r3,r24 /* not found, copy phys to r3 */ b .kexec_wait /* next kernel might do better */ -2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ +2: SET_PACA(r13) #ifdef CONFIG_PPC_BOOK3E addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 @@ -534,7 +534,7 @@ _GLOBAL(pmac_secondary_start) ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ + SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 10f0aadee95b..102244edecf0 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -156,11 +156,22 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) /* Put the paca pointer into r13 and SPRG_PACA */ void setup_paca(struct paca_struct *new_paca) { + /* Setup r13 */ local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); + #ifdef CONFIG_PPC_BOOK3E + /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#else + /* In HV mode, we setup both HPACA and PACA to avoid problems + * if we do a GET_PACA() before the feature fixups have been + * applied + */ + if (cpu_has_feature(CPU_FTR_HVMODE_206)) + mtspr(SPRN_SPRG_HPACA, local_paca); #endif + mtspr(SPRN_SPRG_PACA, local_paca); + } static int __initdata paca_size; -- cgit v1.2.2 From a5d4f3ad3a28cf046836b9bfae61d532b8f77036 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Apr 2011 14:20:31 +1000 Subject: powerpc: Base support for exceptions using HSRR0/1 Pass the register type to the prolog, also provides alternate "HV" version of hardware interrupt (0x500) and adjust LPES accordingly We tag those interrupts by setting bit 0x2 in the trap number Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power7.S | 3 ++- arch/powerpc/kernel/exceptions-64s.S | 48 ++++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index f2b317817c4e..e801ef15d6d0 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -52,13 +52,14 @@ __init_hvmode_206: __init_LPCR: /* Setup a sane LPCR: * - * LPES = 0b11 (SRR0/1 used for 0x500) + * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 * * Other bits untouched for now */ mfspr r3,SPRN_LPCR ori r3,r3,(LPCR_LPES0|LPCR_LPES1) + xori r3,r3, LPCR_LPES0 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) mtspr SPRN_LPCR,r3 isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6784bf7090f6..17f1d6670635 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -44,7 +44,7 @@ _machine_check_pSeries: HMT_MEDIUM DO_KVM 0x200 mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) . = 0x300 .globl data_access_pSeries @@ -71,9 +71,9 @@ BEGIN_FTR_SECTION std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R9(r13) std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common) + EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) . = 0x380 @@ -147,11 +147,24 @@ instruction_access_slb_pSeries: bctr #endif - MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt) + . = 0x500; + .globl hardware_interrupt_pSeries +hardware_interrupt_pSeries: + BEGIN_FTR_SECTION + MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + FTR_SECTION_ELSE + MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + STD_EXCEPTION_PSERIES(0x600, alignment) STD_EXCEPTION_PSERIES(0x700, program_check) STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - MASKABLE_EXCEPTION_PSERIES(0x900, decrementer) + + . = 0x900; + .globl decrementer_pSeries +decrementer_pSeries: + MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD) + STD_EXCEPTION_PSERIES(0xa00, trap_0a) STD_EXCEPTION_PSERIES(0xb00, trap_0b) @@ -207,15 +220,15 @@ vsx_unavailable_pSeries_1: b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error) + HSTD_EXCEPTION_PSERIES(0x1202, cbe_system_error) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance) + HSTD_EXCEPTION_PSERIES(0x1602, cbe_maintenance) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1700, altivec_assist) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal) + HSTD_EXCEPTION_PSERIES(0x1802, cbe_thermal) #endif /* CONFIG_CBE_RAS */ . = 0x3000 @@ -244,13 +257,26 @@ masked_interrupt: rfid b . +masked_Hinterrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_HSRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_HSRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + mfspr r13,SPRN_SPRG_HSCRATCH0 + hrfid + b . + .align 7 do_stab_bolted_pSeries: std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) mfspr r10,SPRN_SPRG_SCRATCH0 std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #ifdef CONFIG_PPC_PSERIES /* @@ -261,14 +287,14 @@ do_stab_bolted_pSeries: system_reset_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) #endif /* CONFIG_PPC_PSERIES */ -- cgit v1.2.2 From b3e6b5dfcf0974069a8ddcce7dd071120d20d79c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Apr 2011 14:27:11 +1000 Subject: powerpc: More work to support HV exceptions Rework exception macros a bit to split offset from vector and add some basic support for HDEC, HDSI, HISI and a few more. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 92 +++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 17f1d6670635..805e20657868 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -37,7 +37,7 @@ .globl __start_interrupts __start_interrupts: - STD_EXCEPTION_PSERIES(0x100, system_reset) + STD_EXCEPTION_PSERIES(0x100, 0x100, system_reset) . = 0x200 _machine_check_pSeries: @@ -113,7 +113,7 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries @@ -147,26 +147,29 @@ instruction_access_slb_pSeries: bctr #endif + /* We open code these as we can't have a ". = x" (even with + * x = "." within a feature section + */ . = 0x500; - .globl hardware_interrupt_pSeries + .globl hardware_interrupt_pSeries; + .globl hardware_interrupt_hv; hardware_interrupt_pSeries: +hardware_interrupt_hv: BEGIN_FTR_SECTION - MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) FTR_SECTION_ELSE - MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) - STD_EXCEPTION_PSERIES(0x600, alignment) - STD_EXCEPTION_PSERIES(0x700, program_check) - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) + STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) - . = 0x900; - .globl decrementer_pSeries -decrementer_pSeries: - MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD) + MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) - STD_EXCEPTION_PSERIES(0xa00, trap_0a) - STD_EXCEPTION_PSERIES(0xb00, trap_0b) + STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) . = 0xc00 .globl system_call_pSeries @@ -196,8 +199,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid /* return to userspace */ b . - STD_EXCEPTION_PSERIES(0xd00, single_step) - STD_EXCEPTION_PSERIES(0xe00, trap_0e) + STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + + /* At 0xe??? we have a bunch of hypervisor exceptions, we branch + * out of line to handle them + */ + . = 0xe00 + b h_data_storage_hv + . = 0xe20 + b h_instr_storage_hv + . = 0xe40 + b emulation_assist_hv + . = 0xe50 + b hmi_exception_hv + . = 0xe60 + b hmi_exception_hv /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the @@ -206,39 +222,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) */ performance_monitor_pSeries_1: . = 0xf00 - DO_KVM 0xf00 b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 - DO_KVM 0xf20 b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 - DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1202, cbe_system_error) + STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1602, cbe_maintenance) + STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1802, cbe_thermal) + STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) #endif /* CONFIG_CBE_RAS */ . = 0x3000 -/*** pSeries interrupt support ***/ +/*** Out of line interrupts support ***/ + + /* moved from 0xe00 */ + STD_EXCEPTION_HV(., 0xe00, h_data_storage) + STD_EXCEPTION_HV(., 0xe20, h_instr_storage) + STD_EXCEPTION_HV(., 0xe40, emulation_assist) + STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) - STD_EXCEPTION_PSERIES(., altivec_unavailable) - STD_EXCEPTION_PSERIES(., vsx_unavailable) + STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -368,6 +387,8 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC @@ -445,6 +466,19 @@ data_access_common: li r5,0x300 b .do_hash_page /* Try to handle as hpte fault */ + .align 7 + .globl h_data_storage_common +h_data_storage_common: + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + .align 7 .globl instruction_access_common instruction_access_common: @@ -454,6 +488,8 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + /* * Here is the common SLB miss user that is used when going to virtual * mode for SLB misses, that is currently not used -- cgit v1.2.2 From 673b189a2e3353061fa8c49515d1014dab6ad9b9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 5 Apr 2011 13:59:58 +1000 Subject: powerpc: Always use SPRN_SPRG_HSCRATCH0 when running in HV mode This uses feature sections to arrange that we always use HSPRG1 as the scratch register in the interrupt entry code rather than SPRG2 when we're running in hypervisor mode on POWER7. This will ensure that we don't trash the guest's SPRG2 when we are running KVM guests. To simplify the code, we define GET_SCRATCH0() and SET_SCRATCH0() macros like the GET_PACA/SET_PACA macros. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 805e20657868..e513c1d35b2a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -43,7 +43,7 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM DO_KVM 0x200 - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ + SET_SCRATCH0(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) . = 0x300 @@ -51,7 +51,7 @@ _machine_check_pSeries: data_access_pSeries: HMT_MEDIUM DO_KVM 0x300 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) BEGIN_FTR_SECTION GET_PACA(r13) std r9,PACA_EXSLB+EX_R9(r13) @@ -67,7 +67,7 @@ BEGIN_FTR_SECTION std r11,PACA_EXGEN+EX_R11(r13) ld r11,PACA_EXSLB+EX_R9(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r12,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r12) std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R9(r13) std r12,PACA_EXGEN+EX_R13(r13) @@ -81,7 +81,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x380 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -95,7 +95,7 @@ data_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -120,7 +120,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x480 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -134,7 +134,7 @@ instruction_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -272,7 +272,7 @@ masked_interrupt: rotldi r10,r10,16 mtspr SPRN_SRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) rfid b . @@ -285,7 +285,7 @@ masked_Hinterrupt: rotldi r10,r10,16 mtspr SPRN_HSRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG_HSCRATCH0 + GET_SCRATCH0(r13) hrfid b . @@ -293,7 +293,7 @@ masked_Hinterrupt: do_stab_bolted_pSeries: std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) @@ -305,14 +305,14 @@ do_stab_bolted_pSeries: .align 7 system_reset_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ + SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ + SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) #endif /* CONFIG_PPC_PSERIES */ @@ -327,7 +327,7 @@ slb_miss_user_pseries: std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG_SCRATCH0 + GET_SCRATCH0(r10) ld r11,PACA_EXSLB+EX_R9(r13) ld r12,PACA_EXSLB+EX_R3(r13) std r10,PACA_EXGEN+EX_R13(r13) -- cgit v1.2.2 From 895796a8ab548fe03b6fea410dcb1b86e1913708 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jan 2011 13:25:55 +1100 Subject: powerpc: Initialize LPCR:DPFD on power7 to a sane default This sets the default data stream prefetch size for operating systems that don't set their own value in DSCR. We use 4 which is "medium". Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power7.S | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index e801ef15d6d0..2390f6f7c478 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -54,6 +54,7 @@ __init_LPCR: * * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 + * DPFD = 4 * * Other bits untouched for now */ @@ -61,6 +62,12 @@ __init_LPCR: ori r3,r3,(LPCR_LPES0|LPCR_LPES1) xori r3,r3, LPCR_LPES0 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + li r5,7 + sldi r5,r5,LPCR_DPFD_SH + andc r3,r3,r5 + li r5,4 + sldi r5,r5,LPCR_DPFD_SH + or r3,r3,r5 mtspr SPRN_LPCR,r3 isync blr -- cgit v1.2.2 From b144871cb5f2c268e94258ae8f1ec810db2e1120 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 1 Mar 2011 15:46:09 +1100 Subject: powerpc: Initialize TLB and LPID register on HV mode Power7 In case entry from the bootloader isn't "clean" Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power7.S | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 2390f6f7c478..4f9a93fcfe07 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -25,7 +25,10 @@ _GLOBAL(__setup_cpu_power7) bl __init_hvmode_206 mtlr r11 beqlr + li r0,0 + mtspr SPRN_LPID,r0 bl __init_LPCR + bl __init_TLB mtlr r11 blr @@ -34,7 +37,10 @@ _GLOBAL(__restore_cpu_power7) mfmsr r3 rldicl. r0,r3,4,63 beqlr + li r0,0 + mtspr SPRN_LPID,r0 bl __init_LPCR + bl __init_TLB mtlr r11 blr @@ -71,3 +77,15 @@ __init_LPCR: mtspr SPRN_LPCR,r3 isync blr + +__init_TLB: + /* Clear the TLB */ + li r6,128 + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr -- cgit v1.2.2 From ad0693ee722b93b63a89c845e99513f242e43aa6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 1 Feb 2011 12:13:09 +1100 Subject: powerpc: Call CPU ->restore callback earlier on secondary CPUs We do it before we loop on the PACA start flag. This way, we get a chance to set critical SPRs on all CPUs before Linux tries to start them up, which avoids problems when changing some bits such as LPCR bits that need to be identical on all threads of a core or similar things like that. Ideally, some of that should also be done before the MMU is enabled, but that's a separate issue which would require moving some of the SMP startup code earlier, let's not get there for now, it works with that change alone. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 39a40400f3f2..95944278380c 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -236,17 +236,6 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 -3: HMT_LOW - lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ - /* start. */ - -#ifndef CONFIG_SMP - b 3b /* Never go on non-SMP */ -#else - cmpwi 0,r23,0 - beq 3b /* Loop until told to go */ - - sync /* order paca.run and cur_cpu_spec */ /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) @@ -258,6 +247,17 @@ generic_secondary_common_init: mtctr r23 bctrl +3: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ +#ifndef CONFIG_SMP + b 3b /* Never go on non-SMP */ +#else + cmpwi 0,r23,0 + beq 3b /* Loop until told to go */ + + sync /* order paca.run and cur_cpu_spec */ + 4: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD -- cgit v1.2.2 From 9d07bc841c9779b4d7902e417f4e509996ce805d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Mar 2011 14:54:35 +1100 Subject: powerpc: Properly handshake CPUs going out of boot spin loop We need to wait a bit for them to have done their CPU setup or we might end up with translation and EE on with different LPCR values between threads Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 18 +++++++++++++----- arch/powerpc/kernel/prom.c | 27 ++++++++++----------------- arch/powerpc/kernel/setup_32.c | 1 + arch/powerpc/kernel/setup_64.c | 13 ++++++++++++- 4 files changed, 36 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 95944278380c..0700e1135c91 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -242,23 +242,31 @@ generic_secondary_common_init: ld r23,0(r23) ld r23,CPU_SPEC_RESTORE(r23) cmpdi 0,r23,0 - beq 4f + beq 3f ld r23,0(r23) mtctr r23 bctrl -3: HMT_LOW +3: LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */ + lwarx r4,0,r3 + subi r4,r4,1 + stwcx. r4,0,r3 + bne 3b + isync + +4: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ /* start. */ #ifndef CONFIG_SMP - b 3b /* Never go on non-SMP */ + b 4b /* Never go on non-SMP */ #else cmpwi 0,r23,0 - beq 3b /* Loop until told to go */ + beq 4b /* Loop until told to go */ sync /* order paca.run and cur_cpu_spec */ + isync /* In case code patching happened */ -4: /* Create a temp kernel stack for use before relocation is on. */ + /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e74fa12afc82..c391dc4c8bad 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -268,13 +268,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - static int logical_cpuid = 0; char *type = of_get_flat_dt_prop(node, "device_type", NULL); const u32 *prop; const u32 *intserv; int i, nthreads; unsigned long len; - int found = 0; + int found = -1; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -299,11 +298,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * booted proc. */ if (initial_boot_params && initial_boot_params->version >= 2) { - if (intserv[i] == - initial_boot_params->boot_cpuid_phys) { - found = 1; - break; - } + if (intserv[i] == initial_boot_params->boot_cpuid_phys) + found = boot_cpu_count; } else { /* * Check if it's the boot-cpu, set it's hw index now, @@ -311,23 +307,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * off secondary threads. */ if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) { - found = 1; - break; - } + "linux,boot-cpu", NULL) != NULL) + found = boot_cpu_count; } - #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ - logical_cpuid++; + boot_cpu_count++; #endif } - if (found) { - DBG("boot cpu: logical %d physical %d\n", logical_cpuid, + if (found >= 0) { + DBG("boot cpu: logical %d physical %d\n", found, intserv[i]); - boot_cpuid = logical_cpuid; - set_hard_smp_processor_id(boot_cpuid, intserv[i]); + boot_cpuid = found; + set_hard_smp_processor_id(found, intserv[i]); /* * PAPR defines "logical" PVR values for cpus that diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d2fbc905303..620d792b52e4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_cpu_count; int boot_cpuid_phys; int smp_hw_index[NR_CPUS]; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5a0401fcaebd..91a5cc5f0d02 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -72,6 +72,7 @@ #endif int boot_cpuid = 0; +int __initdata boot_cpu_count; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -233,6 +234,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -245,7 +247,16 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); *ptr = __pa(generic_secondary_smp_init); - mb(); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (boot_cpu_count == 0) + break; + udelay(1); + } + DBG("boot_cpu_count = %d\n", boot_cpu_count); DBG(" <- smp_release_cpus()\n"); } -- cgit v1.2.2 From 948cf67c4726cca2fc57533dccadfb54d890689d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jan 2011 18:42:41 +1100 Subject: powerpc: Add NAP mode support on Power7 in HV mode Wakeup comes from the system reset handler with a potential loss of the non-hypervisor CPU state. We save the non-volatile state on the stack and a pointer to it in the PACA, which the system reset handler uses to restore things Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/exceptions-64s.S | 30 ++++++++++- arch/powerpc/kernel/idle_power7.S | 97 ++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/idle_power7.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7c6eb4974f25..0fd6273bb8a9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o +obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e513c1d35b2a..ad06333631ac 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -37,7 +37,35 @@ .globl __start_interrupts __start_interrupts: - STD_EXCEPTION_PSERIES(0x100, 0x100, system_reset) + .globl system_reset_pSeries; +system_reset_pSeries: + HMT_MEDIUM; + DO_KVM 0x100; + SET_SCRATCH0(r13) +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm r13,r13,47-31,30,31 + cmpwi cr0,r13,1 + bne 1f + b .power7_wakeup_noloss +1: cmpwi cr0,r13,2 + bne 1f + b .power7_wakeup_loss + /* Total loss of HV state is fatal, we could try to use the + * PIR to locate a PACA, then use an emergency stack etc... + * but for now, let's just stay stuck here + */ +1: cmpwi cr0,r13,3 + beq . +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +#endif /* CONFIG_PPC_P7_NAP */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) . = 0x200 _machine_check_pSeries: diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S new file mode 100644 index 000000000000..f8f0bc7f1d4f --- /dev/null +++ b/arch/powerpc/kernel/idle_power7.S @@ -0,0 +1,97 @@ +/* + * This file contains the power_save function for 970-family CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + +#ifndef CONFIG_SMP + /* Make sure FPU, VSX etc... are flushed as we may lose + * state when going to nap mode + */ + bl .discard_lazy_cpu_state +#endif /* CONFIG_SMP */ + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r3 + std r3,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* Magic NAP mode enter sequence */ + std r0,0(r1) + ptesync + ld r0,0(r1) +1: cmp cr0,r0,r0 + bne 1b + PPC_NAP + b . + +_GLOBAL(power7_wakeup_loss) + GET_PACA(r13) + ld r1,PACAR1(r13) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +_GLOBAL(power7_wakeup_noloss) + GET_PACA(r13) + ld r1,PACAR1(r13) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid -- cgit v1.2.2 From dd797738643cd3c2dd9cdff7e4c3a04d318ab23a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Apr 2011 14:34:58 +1000 Subject: powerpc: Perform an isync to synchronize CPUs coming out of secondary_hold We need to do that to guarantee they see any code change done by dynamic patching during boot. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0700e1135c91..6d17c37f22a1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -147,6 +147,8 @@ __secondary_hold: mtctr r4 mr r3,r24 li r4,0 + /* Make sure that patched code is visible */ + isync bctr #else BUG_OPCODE -- cgit v1.2.2 From af2771493a1bf79cd9a1ab4f30327c428b5bd67c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 6 Apr 2011 10:51:17 +1000 Subject: powerpc: Improve prom_printf() Adds the ability to print decimal numbers and adds some more format string variants Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 941ff4dbc567..7839bd7bfd15 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...) const char *p, *q, *s; va_list args; unsigned long v; + long vs; struct prom_t *_prom = &RELOC(prom); va_start(args, format); @@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...) v = va_arg(args, unsigned long); prom_print_hex(v); break; + case 'd': + ++q; + vs = va_arg(args, int); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + break; case 'l': ++q; - if (*q == 'u') { /* '%lu' */ + if (*q == 0) + break; + else if (*q == 'x') { + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + } else if (*q == 'u') { /* '%lu' */ ++q; v = va_arg(args, unsigned long); prom_print_dec(v); + } else if (*q == 'd') { /* %ld */ + ++q; + vs = va_arg(args, long); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); } break; } -- cgit v1.2.2 From de300974761d92f71cb583730ac9e1d4eb1b7156 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Apr 2011 21:46:19 +0000 Subject: powerpc/smp: smp_ops->kick_cpu() should be able to fail When we start a cpu we use smp_ops->kick_cpu(), which currently returns void, it should be able to fail. Convert it to return int, and update all uses. Convert all the current error cases to return -ENOENT, which is what would eventually be returned by __cpu_up() currently when it doesn't detect the cpu as coming up in time. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cbdbb14be4b0..b6083f4f39b1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; #ifdef CONFIG_PPC64 -void __devinit smp_generic_kick_cpu(int nr) +int __devinit smp_generic_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -106,6 +106,8 @@ void __devinit smp_generic_kick_cpu(int nr) */ paca[nr].cpu_start = 1; smp_mb(); + + return 0; } #endif @@ -434,7 +436,11 @@ int __cpuinit __cpu_up(unsigned int cpu) /* wake up cpus */ DBG("smp: kicking cpu %d\n", cpu); - smp_ops->kick_cpu(cpu); + rc = smp_ops->kick_cpu(cpu); + if (rc) { + pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); + return rc; + } /* * wait to see if the cpu made a callin (is actually up). -- cgit v1.2.2 From 07fa7a0a8a586c01a8b416358c7012dcb9dc688d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Apr 2011 17:29:36 +0200 Subject: powerpc, hw_breakpoints: Fix racy access to ptrace breakpoints While the tracer accesses ptrace breakpoints, the child task may concurrently exit due to a SIGKILL and thus release its breakpoints at the same time. We can then dereference some freed pointers. To fix this, hold a reference on the child breakpoints before manipulating them. Reported-by: Oleg Nesterov Signed-off-by: Frederic Weisbecker Acked-by: Prasad Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Will Deacon Cc: Paul Mundt Cc: v2.6.33.. Link: http://lkml.kernel.org/r/1302284067-7860-4-git-send-email-fweisbec@gmail.com --- arch/powerpc/kernel/ptrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 55613e33e263..4edeeb325429 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1591,7 +1591,10 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_SET_DEBUGREG: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; ret = ptrace_set_debugreg(child, addr, data); + ptrace_put_breakpoints(child); break; #ifdef CONFIG_PPC64 -- cgit v1.2.2 From 76b4eda866c4936af8d696f040abea56bf688e16 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 14 Apr 2011 22:32:01 +0000 Subject: powerpc: Add A2 cpu support Add the cputable entry, regs and setup & restore entries for the PowerPC A2 core. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/cpu_setup_a2.S | 114 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 25 +++++++- 3 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kernel/cpu_setup_a2.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0fd6273bb8a9..058bc8bac488 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S new file mode 100644 index 000000000000..7f818feaa7a5 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -0,0 +1,114 @@ +/* + * A2 specific assembly support code + * + * Copyright 2009 Ben Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. + * This also prevents external LPID accesses but that isn't a problem when not a + * guest. Under PV, this setting will be ignored and MMUCR will return the right + * number of PID bits we can use. + */ +#define MMUCR1_EXTEND_PID \ + (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ + MMUCR1_DTTID | MMUCR1_DCCD) + +/* + * Use extended PIDs if enabled. + * Don't clear the ERATs on context sync events and enable I & D LRU. + * Enable ERAT back invalidate when tlbwe overwrites an entry. + */ +#define INITIAL_MMUCR1 \ + (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ + MMUCR1_DRRE | MMUCR1_TLBWE_BINV) + +_GLOBAL(__setup_cpu_a2) + /* Some of these are actually thread local and some are + * core local but doing it always won't hurt + */ + +#ifdef CONFIG_PPC_WSP_COPRO + /* Make sure ACOP starts out as zero */ + li r3,0 + mtspr SPRN_ACOP,r3 + + /* Enable icswx instruction */ + mfspr r3,SPRN_A2_CCR2 + ori r3,r3,A2_CCR2_ENABLE_ICSWX + mtspr SPRN_A2_CCR2,r3 + + /* Unmask all CTs in HACOP */ + li r3,-1 + mtspr SPRN_HACOP,r3 +#endif /* CONFIG_PPC_WSP_COPRO */ + + /* Enable doorbell */ + mfspr r3,SPRN_A2_CCR2 + oris r3,r3,A2_CCR2_ENABLE_PC@h + mtspr SPRN_A2_CCR2,r3 + isync + + /* Setup CCR0 to disable power saving for now as it's busted + * in the current implementations. Setup CCR1 to wake on + * interrupts normally (we write the default value but who + * knows what FW may have clobbered...) + */ + li r3,0 + mtspr SPRN_A2_CCR0, r3 + LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) + mtspr SPRN_A2_CCR1, r3 + + /* Initialise MMUCR1 */ + lis r3,INITIAL_MMUCR1@h + ori r3,r3,INITIAL_MMUCR1@l + mtspr SPRN_MMUCR1,r3 + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + LOAD_REG_IMMEDIATE(r3, 0x000a7531) + mtspr SPRN_MMUCR2,r3 + + /* Set MMUCR3 to write all thids bit to the TLB */ + LOAD_REG_IMMEDIATE(r3, 0x0000000f) + mtspr SPRN_MMUCR3,r3 + + /* Don't do ERAT stuff if running guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne 1f + + /* Now set the I-ERAT watermark to 15 */ + lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_IERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* Now set the D-ERAT watermark to 31 */ + lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_DERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* And invalidate the beast just in case. That won't get rid of + * a bolted entry though it will be in LRU and so will go away eventually + * but let's not bother for now + */ + PPC_ERATILX(0,0,0) +1: + blr + +_GLOBAL(__restore_cpu_a2) + b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b65b4908d3c7..3d7b65ad4962 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); +extern void __restore_cpu_a2(void); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -2011,7 +2013,26 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ -#ifdef CONFIG_PPC_BOOK3E_64 +#ifdef CONFIG_PPC_A2 + { /* Standard A2 (>= DD2) + FPU core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00480000, + .cpu_name = "A2 (>= DD2)", + .cpu_features = CPU_FTRS_A2, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | + MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL | + MMU_FTR_USE_TLBRSRV | + MMU_FTR_USE_PAIRED_MAS, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .cpu_setup = __setup_cpu_a2, + .cpu_restore = __restore_cpu_a2, + .machine_check = machine_check_generic, + .platform = "ppca2", + }, { /* This is a default entry to get going, to be replaced by * a real one at some stage */ @@ -2032,7 +2053,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "power6", }, -#endif +#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; -- cgit v1.2.2 From ca1769f7a372898f5e3dbb8e4ff53f53f0626ef4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Apr 2011 22:32:04 +0000 Subject: powerpc: Index crit/dbg/mcheck stacks using cpu number on 64bit In exc_lvl_ctx_init() we index into the crit/dbg/mcheck stacks using the hard cpu id, but that assumes the hard cpu id is zero based and contiguous. That is not the case on A2. The root of the problem is that the 32bit code has no equivalent of the paca to allow it to do the hard->soft mapping in assembler. Until the 32bit code is updated to handle that, index the stacks using the soft cpu ids on 64bit and hard on 32 bit. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f621b7d2d869..ea09512a68c3 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; void exc_lvl_ctx_init(void) { struct thread_info *tp; - int i, hw_cpu; + int i, cpu_nr; for_each_possible_cpu(i) { - hw_cpu = get_hard_smp_processor_id(i); - memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = critirq_ctx[hw_cpu]; - tp->cpu = i; +#ifdef CONFIG_PPC64 + cpu_nr = i; +#else + cpu_nr = get_hard_smp_processor_id(i); +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = critirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; #ifdef CONFIG_BOOKE - memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = dbgirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = dbgirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; - memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = mcheckirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = mcheckirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = HARDIRQ_OFFSET; #endif } -- cgit v1.2.2 From 1a51dde139d5305b2592c716c50c005d6ab9624b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 14 Apr 2011 22:32:04 +0000 Subject: powerpc/book3e: Use way 3 for linear mapping bolted entry An erratum on A2 can lead to the bolted entry we insert for the linear mapping being evicted, to avoid that write the bolted entry to way 3. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9651acc3504a..8fe0fc233f02 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -864,8 +864,9 @@ have_hes: * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ - ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS - mtspr SPRN_MAS0,r3 + ori r11,r3,MAS0_WQ_ALLWAYS + oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ + mtspr SPRN_MAS0,r11 lis r3,(MAS1_VALID | MAS1_IPROT)@h ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT mtspr SPRN_MAS1,r3 -- cgit v1.2.2 From f0aae3238fc1c28b543cbaaa0e7c5d57685f5f89 Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Thu, 14 Apr 2011 22:32:05 +0000 Subject: powerpc/book3e: Flush IPROT protected TLB entries leftover by firmware When we set up the TLB for ourselves on Book3E, we need to flush out any old mappings established by the firmware or bootloader. At present we attempt this with a tlbilx to flush everything, but this will leave behind any entries with the IPROT bit set. There are several good reason firmware might establish mappings with IPROT, and in fact ePAPR compliant firmwares are required to establish their initial mapped area with IPROT. This patch, therefore adds more complex code to scan through the TLB upon entry and flush away any entries that are not our own. Signed-off-by: Jack Miller Signed-off-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 45 +++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 8fe0fc233f02..23bd83b20be4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -886,8 +886,51 @@ have_hes: bctr 1: /* We are now running at PAGE_OFFSET, clean the TLB of everything - * else (XXX we should scan for bolted crap from the firmware too) + * else (including IPROTed things left by firmware) + * r4 = TLBnCFG + * r3 = current address (more or less) */ + + li r5,0 + mtspr SPRN_MAS6,r5 + tlbsx 0,r3 + + rlwinm r9,r4,0,TLBnCFG_N_ENTRY + rlwinm r10,r4,8,0xff + addi r10,r10,-1 /* Get inner loop mask */ + + li r3,1 + + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT)) + + mfspr r6,SPRN_MAS2 + rldicr r6,r6,0,51 /* Extract EPN */ + + mfspr r7,SPRN_MAS0 + rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */ + + rlwinm r8,r7,16,0xfff /* Extract ESEL */ + +2: add r4,r3,r8 + and r4,r4,r10 + + rlwimi r7,r4,16,MAS0_ESEL_MASK + + mtspr SPRN_MAS0,r7 + mtspr SPRN_MAS1,r5 + mtspr SPRN_MAS2,r6 + tlbwe + + addi r3,r3,1 + and. r4,r3,r10 + + bne 3f + addis r6,r6,(1<<30)@h +3: + cmpw r3,r9 + blt 2b + PPC_TLBILX(0,0,0) sync isync -- cgit v1.2.2 From efcac6589a277c10060e4be44b9455cf43838dc1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 2 Mar 2011 15:18:48 +0000 Subject: powerpc: Per process DSCR + some fixes (try#4) The DSCR (aka Data Stream Control Register) is supported on some server PowerPC chips and allow some control over the prefetch of data streams. This patch allows the value to be specified per thread by emulating the corresponding mfspr and mtspr instructions. Children of such threads inherit the value. Other threads use a default value that can be specified in sysfs - /sys/devices/system/cpu/dscr_default. If a thread starts with non default value in the sysfs entry, all children threads inherit this non default value even if the sysfs value is changed later. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/entry_64.S | 15 +++++++++++++++ arch/powerpc/kernel/process.c | 16 ++++++++++++++++ arch/powerpc/kernel/sysfs.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps.c | 24 ++++++++++++++++++++++++ 5 files changed, 94 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145ab..6887661ac072 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,6 +74,7 @@ int main(void) DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); + DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index dbf5bfafd7bc..64693706ebfd 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -421,6 +421,12 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + mfspr r25,SPRN_DSCR + std r25,THREAD_DSCR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 @@ -522,6 +528,15 @@ BEGIN_FTR_SECTION mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + ld r0,THREAD_DSCR(r4) + cmpd r0,r25 + beq 1f + mtspr SPRN_DSCR,r0 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f74f355a9617..a01c2d93fd2f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ +extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ + int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) @@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ksp_vsid = sp_vsid; } #endif /* CONFIG_PPC_STD_MMU_64 */ +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + if (current->thread.dscr_inherit) { + p->thread.dscr_inherit = 1; + p->thread.dscr = current->thread.dscr; + } else if (0 != dscr_default) { + p->thread.dscr_inherit = 1; + p->thread.dscr = dscr_default; + } else { + p->thread.dscr_inherit = 0; + p->thread.dscr = 0; + } + } +#endif /* * The PPC64 ABI makes use of a TOC to contain function diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c0d8c2006bf4..f0f2199e64e1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); + +unsigned long dscr_default = 0; +EXPORT_SYMBOL(dscr_default); + +static ssize_t show_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +static ssize_t __used store_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + return count; +} + +static SYSDEV_CLASS_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + int err = 0; + if (cpu_has_feature(CPU_FTR_DSCR)) + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, + &attr_dscr_default.attr); +} #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T @@ -617,6 +652,9 @@ static int __init topology_init(void) if (cpu_online(cpu)) register_cpu_online(cpu); } +#ifdef CONFIG_PPC64 + sysfs_create_dscr_default(); +#endif /* CONFIG_PPC64 */ return 0; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5ddb801bc154..cb71cf29edea 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -909,6 +909,26 @@ static int emulate_instruction(struct pt_regs *regs) return emulate_isel(regs, instword); } +#ifdef CONFIG_PPC64 + /* Emulate the mfspr rD, DSCR. */ + if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mfdscr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_DSCR); + return 0; + } + /* Emulate the mtspr DSCR, rD. */ + if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mtdscr, regs); + rd = (instword >> 21) & 0x1f; + mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr_inherit = 1; + return 0; + } +#endif + return -EINVAL; } @@ -1506,6 +1526,10 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), #endif +#ifdef CONFIG_PPC64 + WARN_EMULATED_SETUP(mfdscr), + WARN_EMULATED_SETUP(mtdscr), +#endif }; u32 ppc_warn_emulated; -- cgit v1.2.2 From 3cc30d0726d258ac336283bcde66a8ab58283b61 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Apr 2011 21:25:01 +0000 Subject: powerpc/pci: Move IO workarounds to the common kernel dir Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 2 + arch/powerpc/kernel/io-workarounds.c | 184 +++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 arch/powerpc/kernel/io-workarounds.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 058bc8bac488..82e0bed0650d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -106,6 +106,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o + obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c new file mode 100644 index 000000000000..7e5845798788 --- /dev/null +++ b/arch/powerpc/kernel/io-workarounds.c @@ -0,0 +1,184 @@ +/* + * Support PCI IO workaround + * + * Copyright (C) 2006 Benjamin Herrenschmidt + * IBM, Corp. + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include + +#include +#include +#include +#include +#include + +#define IOWA_MAX_BUS 8 + +static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; +static unsigned int iowa_bus_count; + +static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) +{ + int i, j; + struct resource *res; + unsigned long vstart, vend; + + for (i = 0; i < iowa_bus_count; i++) { + struct iowa_bus *bus = &iowa_busses[i]; + struct pci_controller *phb = bus->phb; + + if (vaddr) { + vstart = (unsigned long)phb->io_base_virt; + vend = vstart + phb->pci_io_size - 1; + if ((vaddr >= vstart) && (vaddr <= vend)) + return bus; + } + + if (paddr) + for (j = 0; j < 3; j++) { + res = &phb->mem_resources[j]; + if (paddr >= res->start && paddr <= res->end) + return bus; + } + } + + return NULL; +} + +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) +{ + struct iowa_bus *bus; + int token; + + token = PCI_GET_ADDR_TOKEN(addr); + + if (token && token <= iowa_bus_count) + bus = &iowa_busses[token - 1]; + else { + unsigned long vaddr, paddr; + pte_t *ptep; + + vaddr = (unsigned long)PCI_FIX_ADDR(addr); + if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) + return NULL; + + ptep = find_linux_pte(init_mm.pgd, vaddr); + if (ptep == NULL) + paddr = 0; + else + paddr = pte_pfn(*ptep) << PAGE_SHIFT; + bus = iowa_pci_find(vaddr, paddr); + + if (bus == NULL) + return NULL; + } + + return bus; +} + +struct iowa_bus *iowa_pio_find_bus(unsigned long port) +{ + unsigned long vaddr = (unsigned long)pci_io_base + port; + return iowa_pci_find(vaddr, 0); +} + + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ +static ret iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) \ + return bus->ops->name al; \ + return __do_##name al; \ +} + +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ +static void iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) { \ + bus->ops->name al; \ + return; \ + } \ + __do_##name al; \ +} + +#include + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +static const struct ppc_pci_io __devinitconst iowa_pci_io = { + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, +#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, + +#include + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +}; + +static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags, void *caller) +{ + struct iowa_bus *bus; + void __iomem *res = __ioremap_caller(addr, size, flags, caller); + int busno; + + bus = iowa_pci_find(0, (unsigned long)addr); + if (bus != NULL) { + busno = bus - iowa_busses; + PCI_SET_ADDR_TOKEN(res, busno + 1); + } + return res; +} + +/* Regist new bus to support workaround */ +void __devinit iowa_register_bus(struct pci_controller *phb, + struct ppc_pci_io *ops, + int (*initfunc)(struct iowa_bus *, void *), void *data) +{ + struct iowa_bus *bus; + struct device_node *np = phb->dn; + + if (iowa_bus_count >= IOWA_MAX_BUS) { + pr_err("IOWA:Too many pci bridges, " + "workarounds disabled for %s\n", np->full_name); + return; + } + + bus = &iowa_busses[iowa_bus_count]; + bus->phb = phb; + bus->ops = ops; + + if (initfunc) + if ((*initfunc)(bus, data)) + return; + + iowa_bus_count++; + + pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); +} + +/* enable IO workaround */ +void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} -- cgit v1.2.2 From d1109b7529f362c06c47140ae09dbd2b853ffddc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Apr 2011 21:25:02 +0000 Subject: powerpc/pci: Make IO workarounds init implicit when first bus is registered Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/io-workarounds.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 7e5845798788..d36515eaa314 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -144,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, return res; } -/* Regist new bus to support workaround */ +/* Enable IO workaround */ +static void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} + +/* Register new bus to support workaround */ void __devinit iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, int (*initfunc)(struct iowa_bus *, void *), void *data) @@ -152,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb, struct iowa_bus *bus; struct device_node *np = phb->dn; + io_workaround_init(); + if (iowa_bus_count >= IOWA_MAX_BUS) { pr_err("IOWA:Too many pci bridges, " "workarounds disabled for %s\n", np->full_name); @@ -171,14 +185,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb, pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); } -/* enable IO workaround */ -void __devinit io_workaround_init(void) -{ - static int io_workaround_inited; - - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - ppc_md.ioremap = iowa_ioremap; - io_workaround_inited = 1; -} -- cgit v1.2.2 From 69b123684b50040b0926eed1e02795dac8cb9587 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Apr 2011 21:25:02 +0000 Subject: powerpc/pci: Properly initialize IO workaround "private" Even when no initfunc is provided. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/io-workarounds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index d36515eaa314..ffafaea3d261 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -175,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb, bus = &iowa_busses[iowa_bus_count]; bus->phb = phb; bus->ops = ops; + bus->private = data; if (initfunc) if ((*initfunc)(bus, data)) -- cgit v1.2.2 From 73706c3283d755d3725c6a48a18e677a15ced8be Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 10 Apr 2011 20:26:15 +0000 Subject: powerpc/irq: Dump chip data pointer in virq_mapping This can be useful for differentiating interrupts on the same host but with different chip data. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ea09512a68c3..4f5d6e751a65 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -1086,10 +1086,11 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_desc *desc; const char *p; static const char none[] = "none"; + void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", - "chip name", "host name"); + seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", + "chip name", "chip data", "host name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -1111,6 +1112,9 @@ static int virq_debug_show(struct seq_file *m, void *private) p = none; seq_printf(m, "%-15s ", p); + data = irq_desc_get_chip_data(desc); + seq_printf(m, "0x%16p ", data); + if (irq_map[i].host && irq_map[i].host->of_node) p = irq_map[i].host->of_node->full_name; else -- cgit v1.2.2 From 9f0b079320ad1cc71ad7ea4e0ed0b64cd72bbd6d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Apr 2011 21:56:03 +0000 Subject: powerpc: Use MSR_64BIT in places Use the new MSR_64BIT in a few places. Some of these are already ifdef'ed for BOOKE vs BOOKS, but it's still clearer, MSR_SF does not immediately parse as "MSR bit for 64bit". Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/powerpc/kernel/traps.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6d17c37f22a1..73d6e9afcdf1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -655,7 +655,7 @@ _GLOBAL(enable_64b_mode) oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_SF | MSR_ISF)@highest + li r12,(MSR_64BIT | MSR_ISF)@highest sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 27c4a4584f80..da989fff19cc 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -381,7 +381,7 @@ badframe: regs, uc, &uc->uc_mcontext); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); @@ -469,7 +469,7 @@ badframe: regs, frame, newsp); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index cb71cf29edea..4a6a109b6816 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -199,7 +199,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) } else if (show_unhandled_signals && unhandled_signal(current, signr) && printk_ratelimit()) { - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); } -- cgit v1.2.2 From eca590f402332ab873d13f2d8d00fa0b91cfff36 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Apr 2011 01:54:07 +0000 Subject: powerpc/rtas: Only sleep in rtas_busy_delay if we have useful work to do RTAS returns extended error codes as a hint of how long the OS might want to wait before retrying a call. If we have nothing else useful to do we may as well call back straight away. This was found when testing the new dynamic dma window feature. Firmware split the zeroing of the TCE table into 32k chunks but returned 9901 (which is a suggested wait of 10ms). All up this took about 10 minutes to complete since msleep is jiffies based and will round 10ms up to 20ms. With the patch below we take 3 seconds to complete the same test. The hint firmware is returning in the RTAS call should definitely be decreased, but even if we slept 1ms each iteration this would take 32s. Signed-off-by: Anton Blanchard Acked-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 2097f2b3cba8..f48446635c89 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -494,7 +494,7 @@ unsigned int rtas_busy_delay(int status) might_sleep(); ms = rtas_busy_delay_time(status); - if (ms) + if (ms && need_resched()) msleep(ms); return ms; -- cgit v1.2.2 From 44ae3ab3358e962039c36ad4ae461ae9fb29596c Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Wed, 6 Apr 2011 19:48:50 +0000 Subject: powerpc: Free up some CPU feature bits by moving out MMU-related features Some of the 64bit PPC CPU features are MMU-related, so this patch moves them to MMU_FTR_ bits. All cpu_has_feature()-style tests are moved to mmu_has_feature(), and seven feature bits are freed as a result. Signed-off-by: Matt Evans Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 45 +++++++++++++++--------------------- arch/powerpc/kernel/entry_64.S | 8 +++---- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/process.c | 4 ++-- arch/powerpc/kernel/prom.c | 17 ++++++++------ arch/powerpc/kernel/setup_64.c | 2 +- 6 files changed, 38 insertions(+), 42 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 3d7b65ad4962..34d2722b9451 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -201,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -216,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -232,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -250,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -286,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -304,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -320,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5 (gr)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -340,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -356,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -373,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -387,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -406,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER6 (architected)", .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -419,8 +419,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (architected)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, @@ -435,8 +434,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -453,8 +451,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -473,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_CELL, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 4, @@ -488,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "PA6T", .cpu_features = CPU_FTRS_PA6T, .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PA6T, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, @@ -505,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (compatible)", .cpu_features = CPU_FTRS_COMPATIBLE, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -2020,11 +2017,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "A2 (>= DD2)", .cpu_features = CPU_FTRS_A2, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL | - MMU_FTR_USE_TLBRSRV | - MMU_FTR_USE_PAIRED_MAS, + .mmu_features = MMU_FTRS_A2, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 0, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 64693706ebfd..d834425186ae 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -468,10 +468,10 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE_NESTED(95) clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95) + ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) FTR_SECTION_ELSE b 2f -ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -485,7 +485,7 @@ BEGIN_FTR_SECTION li r9,MMU_SEGSIZE_1T /* insert B field */ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Update the last bolted SLB. No write barriers are needed * here, provided we only update the current CPU's SLB shadow @@ -497,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ - /* No need to check for CPU_FTR_NO_SLBIE_B here, since when + /* No need to check for MMU_FTR_NO_SLBIE_B here, since when * we have 1TB segments, the only CPUs known to have the errata * only support less than 1TB of system memory and we'll never * actually hit this code path. diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ad06333631ac..226cc8c62224 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -102,7 +102,7 @@ BEGIN_FTR_SECTION EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) FTR_SECTION_ELSE EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries @@ -840,7 +840,7 @@ _STATIC(do_hash_page) BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) clrrdi r11,r1,THREAD_SHIFT lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a01c2d93fd2f..095043d79946 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -757,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, _ALIGN_UP(sizeof(struct thread_info), 16); #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) { + if (mmu_has_feature(MMU_FTR_SLB)) { unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) << SLB_VSID_SHIFT_1T; else diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c391dc4c8bad..5f5e6aed2b70 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -123,18 +123,19 @@ static void __init move_device_tree(void) */ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ + unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_SLB, 0, 0, 2, 0}, - {CPU_FTR_CTRL, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1}, - {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, + {0, MMU_FTR_SLB, 0, 0, 2, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; @@ -166,9 +167,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 91a5cc5f0d02..959c63cf62e4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -436,7 +436,7 @@ void __init setup_system(void) static u64 slb0_limit(void) { - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { return 1UL << SID_SHIFT_1T; } return 1UL << SID_SHIFT; -- cgit v1.2.2 From 476eb4912601a8c01e6702b9a029f476b4b131d2 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 4 May 2011 15:02:15 +1000 Subject: powerpc/irq: Stop exporting irq_map First step in eliminating irq_map[] table entirely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4f5d6e751a65..a81dd74414bf 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -481,20 +481,42 @@ void do_softirq(void) * IRQ controller and virtual interrupts */ +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + static LIST_HEAD(irq_hosts); static DEFINE_RAW_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); -struct irq_map_entry irq_map[NR_IRQS]; +static struct irq_map_entry irq_map[NR_IRQS]; static unsigned int irq_virq_count = NR_IRQS; static struct irq_host *irq_default_host; +irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +{ + return irq_map[d->irq].hwirq; +} +EXPORT_SYMBOL_GPL(irqd_to_hwirq); + irq_hw_number_t virq_to_hw(unsigned int virq) { return irq_map[virq].hwirq; } EXPORT_SYMBOL_GPL(virq_to_hw); +struct irq_host *virq_to_host(unsigned int virq) +{ + return irq_map[virq].host; +} +EXPORT_SYMBOL_GPL(virq_to_host); + static int default_irq_host_match(struct irq_host *h, struct device_node *np) { return h->of_node != NULL && h->of_node == np; @@ -1103,7 +1125,7 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_chip *chip; seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", virq_to_hw(i)); + seq_printf(m, "0x%05lx ", irq_map[i].hwirq); chip = irq_desc_get_chip(desc); if (chip && chip->name) -- cgit v1.2.2 From 1977b502120d44b9b4897703adfb2e2fab346880 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 1 May 2011 19:46:44 +0000 Subject: powerpc: Save register r9-r13 values accurately on interrupt with bad stack When we take an interrupt or exception from kernel mode and the stack pointer is obviously not a kernel address (i.e. the top bit is 0), we switch to an emergency stack, save register values and panic. However, on 64-bit server machines, we don't actually save the values of r9 - r13 at the time of the interrupt, but rather values corrupted by the exception entry code for r12-r13, and nothing at all for r9-r11. This fixes it by passing a pointer to the register save area in the paca through to the bad_stack code in r3. The register values are saved in one of the paca register save areas (depending on which exception this is). Using the pointer in r3, the bad_stack code now retrieves the saved values of r9 - r13 and stores them in the exception frame on the emergency stack. This also stores the normal exception frame marker ("regshere") in the exception frame. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 226cc8c62224..27ca8b7c6002 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -461,9 +461,20 @@ bad_stack: std r12,_XER(r1) SAVE_GPR(0,r1) SAVE_GPR(2,r1) - SAVE_4GPRS(3,r1) - SAVE_2GPRS(7,r1) - SAVE_10GPRS(12,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) + SAVE_8GPRS(14,r1) SAVE_10GPRS(22,r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) @@ -472,6 +483,9 @@ bad_stack: li r12,0 std r12,0(r11) ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_bad_stack b 1b -- cgit v1.2.2 From 48404f2e95ef0ffd8134d89c8abcd1a15e15f1b0 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 1 May 2011 19:48:20 +0000 Subject: powerpc: Save Come-From Address Register (CFAR) in exception frame Recent 64-bit server processors (POWER6 and POWER7) have a "Come-From Address Register" (CFAR), that records the address of the most recent branch or rfid (return from interrupt) instruction for debugging purposes. This saves the value of the CFAR in the exception entry code and stores it in the exception frame. We also make xmon print the CFAR value in its register dump code. Rather than extend the pt_regs struct at this time, we steal the orig_gpr3 field, which is only used for system calls, and use it for the CFAR value for all exceptions/interrupts other than system calls. This means we don't save the CFAR on system calls, which is not a great problem since system calls tend not to happen unexpectedly, and also avoids adding the overhead of reading the CFAR to the system call entry path. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 27ca8b7c6002..0ec3b42717d7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -474,6 +474,10 @@ bad_stack: std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) SAVE_8GPRS(14,r1) SAVE_10GPRS(22,r1) lhz r12,PACA_TRAP_SAVE(r13) -- cgit v1.2.2 From 104699c0ab473535793b5fea156adaf309afd29b Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 28 Apr 2011 05:07:23 +0000 Subject: powerpc: Convert old cpumask API into new one Adapt new API. Almost change is trivial. Most important change is the below line because we plan to change task->cpus_allowed implementation. - ctx->cpus_allowed = current->cpus_allowed; Signed-off-by: KOSAKI Motohiro Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 32 ++++++++++++++++---------------- arch/powerpc/kernel/setup-common.c | 4 ++-- arch/powerpc/kernel/smp.c | 4 ++-- arch/powerpc/kernel/traps.c | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 5b5e1f002a8e..ccc2198e6b23 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs) return; hard_irq_disable(); - if (!cpu_isset(cpu, cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); - cpu_set(cpu, cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_in_crash); /* * Entered via soft-reset - could be the kdump @@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs) * Tell the kexec CPU that entered via soft-reset and ready * to go down. */ - if (cpu_isset(cpu, cpus_in_sr)) { - cpu_clear(cpu, cpus_in_sr); + if (cpumask_test_cpu(cpu, &cpus_in_sr)) { + cpumask_clear_cpu(cpu, &cpus_in_sr); atomic_inc(&enter_on_soft_reset); } @@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs) * This barrier is needed to make sure that all CPUs are stopped. * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpu_isset(crashing_cpu, cpus_in_crash)) + while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu) { unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); while (atomic_read(&enter_on_soft_reset) != ncpus) cpu_relax(); } @@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu) */ printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; - while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); mdelay(1); } @@ -144,20 +144,20 @@ static void crash_kexec_prepare_cpus(int cpu) * user to do soft reset such that we get all. * Soft-reset will be used until better mechanism is implemented. */ - if (cpus_weight(cpus_in_crash) < ncpus) { + if (cpumask_weight(&cpus_in_crash) < ncpus) { printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpus_weight(cpus_in_crash)); + ncpus - cpumask_weight(&cpus_in_crash)); printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); atomic_set(&enter_on_soft_reset, 0); - while (cpus_weight(cpus_in_crash) < ncpus) + while (cpumask_weight(&cpus_in_crash) < ncpus) cpu_relax(); } /* * Make sure all CPUs are entered via soft-reset if the kdump is * invoked using soft-reset. */ - if (cpu_isset(cpu, cpus_in_sr)) + if (cpumask_test_cpu(cpu, &cpus_in_sr)) crash_soft_reset_check(cpu); /* Leave the IPI callback set */ } @@ -210,7 +210,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * exited using 'x'(exit and recover) or * kexec_should_crash() failed for all running tasks. */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); local_irq_restore(flags); return; } @@ -224,7 +224,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * then start kexec boot. */ crash_soft_reset_check(cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); machine_kexec(kexec_crash_image); @@ -253,7 +253,7 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ @@ -345,7 +345,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 21f30cb68077..1475df6e403f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; - threads_core_mask = CPU_MASK_NONE; + cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads * for simplicity and performance @@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc) BUG_ON(tpc != (1 << threads_shift)); for (i = 0; i < tpc; i++) - cpu_set(i, threads_core_mask); + cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", tpc, tpc > 1 ? "s" : ""); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b6083f4f39b1..87517ab6d365 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -513,7 +513,7 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) @@ -614,7 +614,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); + cpumask_copy(old_mask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4a6a109b6816..06b9d457d0a7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -221,7 +221,7 @@ void system_reset_exception(struct pt_regs *regs) } #ifdef CONFIG_KEXEC - cpu_set(smp_processor_id(), cpus_in_sr); + cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); #endif die("System Reset", regs, SIGABRT); -- cgit v1.2.2 From 7707e4110e5692fe85e7e6c471c9bb2a9254d313 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 24 Apr 2011 15:04:31 +0000 Subject: powerpc/kexec: Fix build failure on 32-bit SMP Commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce left crash_kexec_wait_realmode() undefined for UP. Commit 7c7a81b53e581d727d069cc45df5510516faac31 defined it for UP but left it undefined for 32-bit SMP. Seems like people are getting confused by nested #ifdef's, so move the definitions of crash_kexec_wait_realmode() after the #ifdef CONFIG_SMP section. Compile-tested with 32-bit UP, 32-bit SMP and 64-bit SMP configurations. Signed-off-by: Ben Hutchings Tested-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 59 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index ccc2198e6b23..21f2c781ded1 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -162,34 +162,6 @@ static void crash_kexec_prepare_cpus(int cpu) /* Leave the IPI callback set */ } -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i)) { - break; - } - if (!cpu_online(i)) { - break; - } - msecs--; - mdelay(1); - } - } - mb(); -} -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } #else /* ! CONFIG_SMP */ -static inline void crash_kexec_wait_realmode(int cpu) {} static void crash_kexec_prepare_cpus(int cpu) { @@ -257,6 +228,36 @@ void crash_kexec_secondary(struct pt_regs *regs) } #endif /* CONFIG_SMP */ +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < NR_CPUS && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. -- cgit v1.2.2 From 9ee820fa005254dfc816330f6654f14dcb2beee1 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 4 May 2011 16:01:20 +1000 Subject: powerpc/pseries: Add page coalescing support Adds support for page coalescing, which is a feature on IBM Power servers which allows for coalescing identical pages between logical partitions. Hint text pages as coalesce candidates, since they are the most likely pages to be able to be coalesced between partitions. This patch also exports some page coalescing statistics available from firmware via lparcfg. [BenH: Moved a couple of things around to fix compile problems] Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 53 +++++++++++++++++++---------------------- arch/powerpc/kernel/prom_init.c | 4 +++- arch/powerpc/kernel/rtas.c | 2 ++ 3 files changed, 30 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 301db65f05a1..84daabe2fcba 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -/** - * h_get_mpp - * H_GET_MPP hcall returns info in 7 parms - */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) -{ - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9(H_GET_MPP, retbuf); - - mpp_data->entitled_mem = retbuf[0]; - mpp_data->mapped_mem = retbuf[1]; - - mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; - mpp_data->pool_num = retbuf[2] & 0xffff; - - mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; - mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; - mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; - - mpp_data->pool_size = retbuf[4]; - mpp_data->loan_request = retbuf[5]; - mpp_data->backing_mem = retbuf[6]; - - return rc; -} -EXPORT_SYMBOL(h_get_mpp); struct hvcall_ppp_data { u64 entitlement; @@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m) seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); + parse_mpp_x_data(m); pseries_cmo_data(m); splpar_dispatch_data(m); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 7839bd7bfd15..c016033ba78d 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -700,8 +700,10 @@ static void __init early_cmdline_parse(void) #endif /* CONFIG_PCI_MSI */ #ifdef CONFIG_PPC_SMLPAR #define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#define OV5_XCMO 0x40 /* Page Coalescing */ #else #define OV5_CMO 0x00 +#define OV5_XCMO 0x00 #endif #define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ @@ -756,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, - OV5_CMO, + OV5_CMO | OV5_XCMO, OV5_TYPE1_AFFINITY, 0, 0, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f48446635c89..271ff6318eda 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,6 +42,7 @@ #include #include #include +#include struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED @@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w atomic_set(&data->error, rc); start_topology_update(); + pSeries_coalesce_init(); if (wake_when_done) { atomic_set(&data->done, 1); -- cgit v1.2.2 From 40bd587a88fcd425f489f3d9f0be7daa84014141 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 3 May 2011 14:07:01 +0000 Subject: powerpc: Rename slb0_limit() to safe_stack_limit() and add Book3E support slb0_limit() wasn't a very descriptive name. This changes it along with a comment explaining what it's used for, and provides a 64-bit BookE implementation. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 959c63cf62e4..c2ec0a12e14f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -434,17 +434,30 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -static u64 slb0_limit(void) +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) { - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; - } return 1UL << SID_SHIFT; +#endif } static void __init irqstack_early_init(void) { - u64 limit = slb0_limit(); + u64 limit = safe_stack_limit(); unsigned int i; /* @@ -497,7 +510,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), ppc64_rma_size); + limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; -- cgit v1.2.2 From a1d0d98daf6ce580d017a43b09fe30a375cde3e8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 14 Apr 2011 22:32:06 +0000 Subject: powerpc: Add WSP platform Add a platform for the Wire Speed Processor, based on the PPC A2. This includes code for the ICS & OPB interrupt controllers, as well as a SCOM backend, and SCOM based cpu bringup. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: David Gibson Signed-off-by: Jack Miller Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 23bd83b20be4..c98e9d260621 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -864,6 +864,20 @@ have_hes: * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ + + /* BEWARE, MAGIC + * This code is called as an ordinary function on the boot CPU. But to + * avoid duplication, this code is also used in SCOM bringup of + * secondary CPUs. We read the code between the initial_tlb_code_start + * and initial_tlb_code_end labels one instruction at a time and RAM it + * into the new core via SCOM. That doesn't process branches, so there + * must be none between those two labels. It also means if this code + * ever takes any parameters, the SCOM code must also be updated to + * provide them. + */ + .globl a2_tlbinit_code_start +a2_tlbinit_code_start: + ori r11,r3,MAS0_WQ_ALLWAYS oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ mtspr SPRN_MAS0,r11 @@ -880,6 +894,9 @@ have_hes: /* Write the TLB entry */ tlbwe + .globl a2_tlbinit_after_linear_map +a2_tlbinit_after_linear_map: + /* Now we branch the new virtual address mapped by this entry */ LOAD_REG_IMMEDIATE(r3,1f) mtctr r3 @@ -931,10 +948,16 @@ have_hes: cmpw r3,r9 blt 2b + .globl a2_tlbinit_after_iprot_flush +a2_tlbinit_after_iprot_flush: + PPC_TLBILX(0,0,0) sync isync + .globl a2_tlbinit_code_end +a2_tlbinit_code_end: + /* We translate LR and return */ mflr r3 tovirt(r3,r3) -- cgit v1.2.2 From a0496d450ab8c17f6c4d86979b1f6ba486fe9365 Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Thu, 14 Apr 2011 22:32:08 +0000 Subject: powerpc: Add early debug for WSP platforms Signed-off-by: Jack Miller Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 17 ++++++++++++ arch/powerpc/kernel/udbg.c | 2 ++ arch/powerpc/kernel/udbg_16550.c | 51 ++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index c98e9d260621..4d0abb4930a1 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -951,6 +952,22 @@ a2_tlbinit_after_linear_map: .globl a2_tlbinit_after_iprot_flush a2_tlbinit_after_iprot_flush: +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP + /* Now establish early debug mappings if applicable */ + /* Restore the MAS0 we used for linear mapping load */ + mtspr SPRN_MAS0,r11 + + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) + mtspr SPRN_MAS2,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) + mtspr SPRN_MAS7_MAS3,r3 + /* re-use the MAS8 value from the linear mapping */ + tlbwe +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ + PPC_TLBILX(0,0,0) sync isync diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index e39cad83c884..23d65abbedce 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,6 +62,8 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) + udbg_init_wsp(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index baa33a7517bc..6837f839ab78 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -11,6 +11,7 @@ #include #include #include +#include extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void) udbg_getc_poll = NULL; } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +static void udbg_wsp_flush(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_wsp_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_wsp_putc('\r'); + udbg_wsp_flush(); + writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_wsp_getc(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return readb(&udbg_comport->rbr); + } + return -1; +} + +static int udbg_wsp_getc_poll(void) +{ + if (udbg_comport) + if (readb(&udbg_comport->lsr) & LSR_DR) + return readb(&udbg_comport->rbr); + return -1; +} + +void __init udbg_init_wsp(void) +{ + udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + + udbg_init_uart(udbg_comport, 57600, 50000000); + + udbg_putc = udbg_wsp_putc; + udbg_flush = udbg_wsp_flush; + udbg_getc = udbg_wsp_getc; + udbg_getc_poll = udbg_wsp_getc_poll; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ -- cgit v1.2.2 From 925f83c085e1bb08435556c5b4844a60de002e31 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 May 2011 01:53:18 +0200 Subject: hw_breakpoints, powerpc: Fix CONFIG_HAVE_HW_BREAKPOINT off-case in ptrace_set_debugreg() We make use of ptrace_get_breakpoints() / ptrace_put_breakpoints() to protect ptrace_set_debugreg() even if CONFIG_HAVE_HW_BREAKPOINT if off. However in this case, these APIs are not implemented. To fix this, push the protection down inside the relevant ifdef. Best would be to export the code inside CONFIG_HAVE_HW_BREAKPOINT into a standalone function to cleanup the ifdefury there and call the breakpoint ref API inside. But as it is more invasive, this should be rather made in an -rc1. Fixes this build error: arch/powerpc/kernel/ptrace.c:1594: error: implicit declaration of function 'ptrace_get_breakpoints' make[2]: *** Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: LPPC Cc: Prasad Cc: v2.6.33.. Link: http://lkml.kernel.org/r/1304639598-4707-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/ptrace.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 4edeeb325429..a6ae1cfad86c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (data && !(data & DABR_TRANSLATION)) return -EIO; #ifdef CONFIG_HAVE_HW_BREAKPOINT + if (ptrace_get_breakpoints(task) < 0) + return -ESRCH; + bp = thread->ptrace_bps[0]; if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } + ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, (DABR_DATA_WRITE | DABR_DATA_READ), &attr.bp_type); ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) + if (ret) { + ptrace_put_breakpoints(task); return ret; + } thread->ptrace_bps[0] = bp; + ptrace_put_breakpoints(task); thread->dabr = data; return 0; } @@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; + ptrace_put_breakpoints(task); return PTR_ERR(bp); } + ptrace_put_breakpoints(task); + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ /* Move contents to the DABR register */ @@ -1591,10 +1601,7 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_SET_DEBUGREG: - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; ret = ptrace_set_debugreg(child, addr, data); - ptrace_put_breakpoints(child); break; #ifdef CONFIG_PPC64 -- cgit v1.2.2 From 70f23fd66bc821a0e99647f70a809e277cc93c4c Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Tue, 10 May 2011 10:16:21 +0200 Subject: treewide: fix a few typos in comments - kenrel -> kernel - whetehr -> whether - ttt -> tt - sss -> ss Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- arch/powerpc/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 42850ee00ada..9411747bbcaf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -142,7 +142,7 @@ static int kgdb_singlestep(struct pt_regs *regs) return 0; /* - * On Book E and perhaps other processsors, singlestep is handled on + * On Book E and perhaps other processors, singlestep is handled on * the critical exception stack. This causes current_thread_info() * to fail, since it it locates the thread_info by masking off * the low bits of the current stack pointer. We work around -- cgit v1.2.2 From 85f60ae4ee817174b0f78928bc7066f28c3551ab Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 29 Apr 2011 00:18:16 -0600 Subject: dt/flattree: explicitly pass command line pointer to early_init_dt_scan_chosen This patch drops the reference to a global 'cmd_line' variable from early_init_dt_scan_chosen, and instead passes the pointer to the command line string via the *data argument. Each architecture does something slightly different with the initial command line, so it makes sense for the architecture to be able to specify the variable name. Signed-off-by: Grant Likely --- arch/powerpc/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e74fa12afc82..c173bee09cee 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -691,7 +691,7 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); -- cgit v1.2.2 From 82a3242e11d9e63c8195be46c954efaefee35e22 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 May 2011 16:01:02 -0700 Subject: sysfs: remove "last sysfs file:" line from the oops messages On some arches (x86, sh, arm, unicore, powerpc) the oops message would print out the last sysfs file accessed. This was very useful in finding a number of sysfs and driver core bugs in the 2.5 and early 2.6 development days, but it has been a number of years since this file has actually helped in debugging anything that couldn't also be trivially determined from the stack traceback. So it's time to delete the line. This is good as we need all the space we can get for oops messages at times on consoles. Acked-by: Phil Carmody Acked-by: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5ddb801bc154..d782cd71c07c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); - sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) return 1; -- cgit v1.2.2 From 69e3cea8d5fd52677f2b6219542d0f8b53fe4c80 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 19 May 2011 13:07:12 +1000 Subject: powerpc/smp: Make start_secondary_resume available to all CPU variants This should fix SMP & Hotplug builds on FSL BookE and 476 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_32.S | 9 --------- arch/powerpc/kernel/misc_32.S | 11 +++++++++++ 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index c5c24beb8387..98c4b29a56f4 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -890,15 +890,6 @@ __secondary_start: mtspr SPRN_SRR1,r4 SYNC RFI - -_GLOBAL(start_secondary_resume) - /* Reset stack */ - rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - li r3,0 - std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary - b . #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 094bd9821ad4..402560e957bd 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -694,6 +694,17 @@ _GLOBAL(kernel_thread) addi r1,r1,16 blr +#ifdef CONFIG_SMP +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . +#endif /* CONFIG_SMP */ + /* * This routine is just here to keep GCC happy - sigh... */ -- cgit v1.2.2 From 35d215fbe4f4d3569f2adabd8d53510a26ecb9c5 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 24 Apr 2011 15:04:31 +0000 Subject: powerpc/kexec: Fix build failure on 32-bit SMP Commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce left crash_kexec_wait_realmode() undefined for UP. Commit 7c7a81b53e581d727d069cc45df5510516faac31 defined it for UP but left it undefined for 32-bit SMP. Seems like people are getting confused by nested #ifdef's, so move the definitions of crash_kexec_wait_realmode() after the #ifdef CONFIG_SMP section. Compile-tested with 32-bit UP, 32-bit SMP and 64-bit SMP configurations. Signed-off-by: Ben Hutchings Tested-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 59 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 5b5e1f002a8e..5eb0ee37f6cd 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -162,34 +162,6 @@ static void crash_kexec_prepare_cpus(int cpu) /* Leave the IPI callback set */ } -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i)) { - break; - } - if (!cpu_online(i)) { - break; - } - msecs--; - mdelay(1); - } - } - mb(); -} -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } #else /* ! CONFIG_SMP */ -static inline void crash_kexec_wait_realmode(int cpu) {} static void crash_kexec_prepare_cpus(int cpu) { @@ -257,6 +228,36 @@ void crash_kexec_secondary(struct pt_regs *regs) } #endif /* CONFIG_SMP */ +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < NR_CPUS && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. -- cgit v1.2.2 From c560bbceaf6b06e52f1ef20131b76a3fdc0a2c19 Mon Sep 17 00:00:00 2001 From: kerstin jonsson Date: Tue, 17 May 2011 23:57:11 +0000 Subject: powerpc/4xx: Fix regression in SMP on 476 commit c56e58537d504706954a06570b4034c04e5b7500 breaks SMP support in PPC_47x chip. secondary_ti must be set to current thread info before callin kick_cpu or else start_secondary_47x will jump into void when trying to return to c-code. In the current setup secondary_ti is initialized before the CPU idle task is started and only the boot core will start. I am not sure this is the correct solution, but it makes SMP possible in my chip. Note! The HOTPLUG support probably need some fixing to, There is no trampoline code available in head_44x.S - start_secondary_resume? Signed-off-by: Kerstin Jonsson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cbdbb14be4b0..f2dcab7aadc8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -410,8 +410,6 @@ int __cpuinit __cpu_up(unsigned int cpu) { int rc, c; - secondary_ti = current_set[cpu]; - if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; @@ -421,6 +419,8 @@ int __cpuinit __cpu_up(unsigned int cpu) if (rc) return rc; + secondary_ti = current_set[cpu]; + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ -- cgit v1.2.2 From 93395febdb51ed812ac1003852f537177a172aad Mon Sep 17 00:00:00 2001 From: Justin Mattock Date: Tue, 5 Apr 2011 06:58:22 +0000 Subject: powerpc: Remove unused config in the Makefile The patch below removes an unused config variable found by using a kernel cleanup script. Note: I did try to cross compile these but hit erros while doing so.. (gcc is not setup to cross compile) and am unsure if anymore needs to be done. Please have a look if/when anybody has free time. Signed-off-by: Justin P. Mattock Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 82e0bed0650d..9aab36312572 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -78,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o -extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -- cgit v1.2.2 From d988f0e3f84cb8a4f85ccdbca6f6fefcc37bedcb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 8 May 2011 21:18:38 +0000 Subject: powerpc: Simplify 4k/64k copy_page logic To make it easier to add optimised versions of copy_page, remove the 4kB loop for 64kB pages and just do all the work in copy_page. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ppc_ksyms.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef3ef566235e..7d28f540200c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); @@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(copy_4K_page); -#endif +EXPORT_SYMBOL(copy_page); #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); -- cgit v1.2.2 From ba00ce1d6e08ad06f19f2ac53fd5c60bbe3fbeeb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 8 May 2011 21:20:19 +0000 Subject: powerpc: Remove static branch hint in giveup_altivec A static branch hint will override dynamic branch prediction on recent POWER CPUs. Since we are about to use more altivec in the kernel remove the static hint in giveup_altivec that assumes a userspace task is using altivec. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vector.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 9de6f396cf85..4d5a3edff49e 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec) MTMSRD(r5) /* enable use of VMX now */ isync PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ + beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 -- cgit v1.2.2 From f5f0307f42d39a51a925ca4841f76a2f2ea330ff Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 8 May 2011 21:36:44 +0000 Subject: powerpc: Improve scheduling of system call entry instructions After looking at our system call path, Mary Brown suggested that we should put all mfspr SRR* instructions before any mtspr SRR*. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n to remove the overhead of virtual CPU accounting and syscall auditing. POWER6: baseline: mean = 757.2 cycles sd = 2.108 modified: mean = 759.1 cycles sd = 2.020 POWER7: baseline: mean = 411.4 cycles sd = 0.138 modified: mean = 404.1 cycles sd = 0.109 So we have 1.77% improvement on POWER7 which looks significant. The POWER6 suggest a 0.25% slowdown, but the results are within 1 standard deviation and may be in the noise. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0ec3b42717d7..a85f4874cba7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -211,11 +211,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 GET_PACA(r13) mfspr r11,SPRN_SRR0 - ld r12,PACAKBASE(r13) - ld r10,PACAKMSR(r13) - LOAD_HANDLER(r12, system_call_entry) - mtspr SPRN_SRR0,r12 mfspr r12,SPRN_SRR1 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, system_call_entry) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 rfid b . /* prevent speculative execution */ -- cgit v1.2.2 From 3d2cea732d68aa270c360f55d8669820ebce188a Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:33 +0000 Subject: powerpc/kexec: Fix memory corruption from unallocated slaves Commit 1fc711f7ffb01089efc58042cfdbac8573d1b59a (powerpc/kexec: Fix race in kexec shutdown) moved the write to signal the cpu had exited the kernel from before the transition to real mode in kexec_smp_wait to kexec_wait. Unfornately it missed that kexec_wait is used both by cpus leaving the kernel and by secondary slave cpus that were not allocated a paca for what ever reason -- they could be beyond nr_cpus or not described in the current device tree for whatever reason (for example, kexec-load was not refreshed after a cpu hotplug operation). Cpus coming through that path they will write to paca[NR_CPUS] which is beyond the space allocated for the paca data and overwrite memory not allocated to pacas but very likely still real mode accessable). Move the write back to kexec_smp_wait, which is used only by cpus that found their paca, but after the transition to real mode. Signed-off-by: Milton Miller Cc: # (1fc711f was backported to 2.6.32) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/misc_64.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 206a321a71d3..e89df59cdc5a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp) * wait for the flag to change, indicating this kernel is going away but * the slave code for the next one is at addresses 0 to 100. * - * This is used by all slaves. + * This is used by all slaves, even those that did not find a matching + * paca in the secondary startup code. * * Physical (hardware) cpu id should be in r3. */ @@ -471,10 +472,6 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b - li r4,KEXEC_STATE_REAL_MODE - stb r4,PACAKEXECSTATE(r13) - SYNC - 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -499,11 +496,17 @@ kexec_flag: * * get phys id from paca * switch to real mode + * mark the paca as no longer used * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) bl real_mode + + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + b .kexec_wait /* -- cgit v1.2.2 From 768d18ad6d5e600d911f9499ca287d6986d8d81b Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:37 +0000 Subject: powerpc: Don't search for paca in freed memory Starting with 1426d5a3bd07589534286375998c0c8c6fdc5260 (powerpc: Dynamically allocate pacas) we free the memory for pacas beyond cpu_possible, but we failed to update the loop the secondary cpus use to find their paca. If the system has running cpu threads for which the kernel did not allocate a paca for they will search the memory that was freed. For instance this could happen when the device tree for a kdump kernel was not updated after a cpu hotplug, or the kernel is running with more cpus than the kernel was configured. Since c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids early and use it to free PACAs) we set nr_cpu_ids before telling the cpus to advance, so use that to limit the search. We can't reference nr_cpu_ids without CONFIG_SMP because it is defined as 1 instead of a memory location, but any extra threads should be sent to kexec_wait in that case anyways, so make that explicit and remove the search loop for UP. Note to stable: The fix also requires c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids early and use it to free PACAs) to function. Also 9d07bc841c9779b4d7902e417f4e509996ce805d (Properly handshake CPUs going out of boot spin loop) affects the second chunk, specifically the branch target was 3b before and is 4b after that patch, and there was a blank line before the #ifdef CONFIG_SMP that was removed Cc: # .34.x: c1854e0072 powerpc: Set nr_cpu_ids early Cc: # .34.x Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 73d6e9afcdf1..ba504099844a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -218,13 +218,19 @@ generic_secondary_common_init: */ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ ld r13,0(r13) /* Get base vaddr of paca array */ +#ifndef CONFIG_SMP + addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ + b .kexec_wait /* wait for next kernel if !SMP */ +#else + LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ + lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 - cmpwi r5,NR_CPUS + cmpw r5,r7 /* Check if more pacas exist */ blt 1b mr r3,r24 /* not found, copy phys to r3 */ @@ -259,9 +265,6 @@ generic_secondary_common_init: 4: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ /* start. */ -#ifndef CONFIG_SMP - b 4b /* Never go on non-SMP */ -#else cmpwi 0,r23,0 beq 4b /* Loop until told to go */ @@ -273,7 +276,7 @@ generic_secondary_common_init: subi r1,r1,STACK_FRAME_OVERHEAD b __secondary_start -#endif +#endif /* SMP */ /* * Turn the MMU off. -- cgit v1.2.2 From bd9e5eefecb3d69018bb95796298019d309cbec8 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:41 +0000 Subject: powerpc/kdump64: Don't reference freed memory as pacas Starting with 1426d5a3bd07589534286375998c0c8c6fdc5260 (powerpc: Dynamically allocate pacas) the space for pacas beyond cpu_possible is freed, but we failed to update the loop in crash.c. Since c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids early and use it to free PACAs) the number of pacas allocated is always nr_cpu_ids. Signed-off-by: Milton Miller Cc: # .34.x Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 21f2c781ded1..4e6ee944495a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -236,7 +236,7 @@ static void crash_kexec_wait_realmode(int cpu) int i; msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; -- cgit v1.2.2 From 8657ae28ddd34db0f52b0730a6a25992c0173264 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:48 +0000 Subject: powerpc: Respect nr_cpu_ids when calling set_cpu_possible and set_cpu_present We should not set cpus above nr_cpu_ids to possible. While we will trigger a warning with CONFIG_CPUMASK_DEBUG, even then the mask initializers will set the bits beyond what the iterators check and cause nr_cpu_ids to increase. Respecting nr_cpu_ids during setup will allow us to use it in our initial paca allocation. It can be reduced from NR_CPUS by the existing early param nr_cpus=, which was added in 2b633e3fac5efada088b57d31e65401f22bcc18f (smp: Use nr_cpus= to set nr_cpu_ids early). We already call parse_early_parms between finding the command line and allocating the pacas. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1475df6e403f..fce759ba31f3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc) * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * of things like irqstacks to nr_cpu_ids rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in * cpu_online_mask as they come up. @@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { const int *intserv; int j, len; @@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void) intserv = &cpu; /* assume logical == phys */ } - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); set_cpu_present(cpu, true); @@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void) if (cpu_has_feature(CPU_FTR_SMT)) maxcpus *= nthreads; - if (maxcpus > NR_CPUS) { + if (maxcpus > nr_cpu_ids) { printk(KERN_WARNING "Partition configured for %d cpus, " "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; + maxcpus, nr_cpu_ids); + maxcpus = nr_cpu_ids; } else printk(KERN_INFO "Partition configured for %d cpus.\n", maxcpus); -- cgit v1.2.2 From 2cd947f1757fb937806535be13caf2ddd813d60b Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:52 +0000 Subject: powerpc: Use nr_cpu_ids in initial paca allocation Now that we never set a cpu above nr_cpu_ids possible we can limit our initial paca allocation to nr_cpu_ids. We can then clamp the number of cpus in platforms/iseries/setup.c. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 102244edecf0..efeb88184182 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include @@ -178,7 +178,7 @@ static int __initdata paca_size; void __init allocate_pacas(void) { - int nr_cpus, cpu, limit; + int cpu, limit; /* * We can't take SLB misses on the paca, and we want to access them @@ -190,23 +190,18 @@ void __init allocate_pacas(void) if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); - nr_cpus = NR_CPUS; - /* On iSeries we know we can never have more than 64 cpus */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - nr_cpus = min(64, nr_cpus); - - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", - paca_size, nr_cpus, paca); + paca_size, nr_cpu_ids, paca); - allocate_lppacas(nr_cpus, limit); + allocate_lppacas(nr_cpu_ids, limit); /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); } -- cgit v1.2.2 From aa79bc2167104581cc1d77762394f2c01d3bf3f3 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:28:55 +0000 Subject: powerpc: Call no-longer static setup_nr_cpu_ids instead of replicating it c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids early and use it to free PACAs) copied the formerly static setup_nr_cpu_ids from init/main.c but 34db18a054c600b6f81787165669dc572fe4de25 (smp: move smp setup functions to kernel/smp.c) moved it to kernel/smp.c with a declaration in include/linux/smp.h, so we can call it instead of replicating it. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index fce759ba31f3..ef33a084fcf4 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void) cpu_init_thread_core_maps(nthreads); /* Now that possible cpus are set, set nr_cpu_ids for later use */ - nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + setup_nr_cpu_ids(); free_unused_pacas(); } -- cgit v1.2.2 From e04763713286b1e00e1c2a33fe2741caf9470f2b Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:06 +0000 Subject: powerpc: Remove call sites of MSG_ALL_BUT_SELF The only user of MSG_ALL_BUT_SELF in the whole kernel tree is powerpc, and it only uses it to start the debugger. Both debuggers always call smp_send_debugger_break with MSG_ALL_BUT_SELF, and only mpic can do anything more optimal than a loop over all online cpus, but all message passing implementations have to code for this special delivery target. Convert smp_send_debugger_break to take void and loop calling the smp_ops message_pass function for each of the other cpus in the online cpumask. Use raw_smp_processor_id() because we are either entering the debugger or trying to start kdump and the additional warning it not useful were it to trigger. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/smp.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 42850ee00ada..bd9d35f59cf4 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) #ifdef CONFIG_SMP void kgdb_roundup_cpus(unsigned long flags) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); } #endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 87517ab6d365..b74411446922 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -218,11 +218,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } -#ifdef CONFIG_DEBUGGER -void smp_send_debugger_break(int cpu) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +void smp_send_debugger_break(void) { - if (likely(smp_ops)) - smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); + int cpu; + int me = raw_smp_processor_id(); + + if (unlikely(!smp_ops)) + return; + + for_each_online_cpu(cpu) + if (cpu != me) + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); } #endif @@ -230,9 +237,9 @@ void smp_send_debugger_break(int cpu) void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback && smp_ops) { + if (crash_ipi_callback) { mb(); - smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + smp_send_debugger_break(); } } #endif -- cgit v1.2.2 From f1072939b6dd01d038d47db0bdc01b33e5f90f28 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:10 +0000 Subject: powerpc: Remove checks for MSG_ALL and MSG_ALL_BUT_SELF Now that smp_ops->smp_message_pass is always called with an (online) cpu number for the target remove the checks for MSG_ALL and MSG_ALL_BUT_SELF. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dbell.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 3307a52d797f..e49b24c84133 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -34,32 +34,13 @@ void doorbell_setup_this_cpu(void) info->tag = mfspr(SPRN_PIR) & 0x3fff; } -void doorbell_message_pass(int target, int msg) +void doorbell_message_pass(int cpu, int msg) { struct doorbell_cpu_info *info; - int i; - - if (target < NR_CPUS) { - info = &per_cpu(doorbell_cpu_info, target); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(i) { - if (i == smp_processor_id()) - continue; - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - } - else { /* target == MSG_ALL */ - for_each_online_cpu(i) { - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - } - ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0); - } + + info = &per_cpu(doorbell_cpu_info, cpu); + set_bit(msg, &info->messages); + ppc_msgsnd(PPC_DBELL, 0, info->tag); } void doorbell_exception(struct pt_regs *regs) -- cgit v1.2.2 From a56555e573d3740d588d912aada506d57759cf5d Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:24 +0000 Subject: powerpc: Remove alloc_maybe_bootmem for zalloc version Replace all remaining callers of alloc_maybe_bootmem with zalloc_maybe_bootmem. The callsite in pci_dn is followed with a memset to clear the memory, and not zeroing at the other callsites in the celleb fake pci code could lead to following uninitialized memory as pointers or even freeing said pointers on error paths. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_dn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d225d99fe39d..6baabc13306a 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) const u32 *regs; struct pci_dn *pdn; - pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; - memset(pdn, 0, sizeof(*pdn)); dn->data = pdn; pdn->node = dn; pdn->phb = phb; -- cgit v1.2.2 From 23d72bfd8f9f24aa9efafed3586a99f5669c23d7 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:39 +0000 Subject: powerpc: Consolidate ipi message mux and demux Consolidate the mux and demux of ipi messages into smp.c and call a new smp_ops callback to actually trigger the ipi. The powerpc architecture code is optimised for having 4 distinct ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi single, scheduler ipi, and enter debugger). However, several interrupt controllers only provide a single software triggered interrupt that can be delivered to each cpu. To resolve this limitation, each smp_ops implementation created a per-cpu variable that is manipulated with atomic bitops. Since these lines will be contended they are optimialy marked as shared_aligned and take a full cache line for each cpu. Distro kernels may have 2 or 3 of these in their config, each taking per-cpu space even though at most one will be in use. This consolidation removes smp_message_recv and replaces the single call actions cases with direct calls from the common message recognition loop. The complicated debugger ipi case with its muxed crash handling code is moved to debug_ipi_action which is now called from the demux code (instead of the multi-message action calling smp_message_recv). I put a call to reschedule_action to increase the likelyhood of correctly merging the anticipated scheduler_ipi() hook coming from the scheduler tree; that single required call can be inlined later. The actual message decode is a copy of the old pseries xics code with its memory barriers and cache line spacing, augmented with a per-cpu unsigned long based on the book-e doorbell code. The optional data is set via a callback from the implementation and is passed to the new cause-ipi hook along with the logical cpu number. While currently only the doorbell implemntation uses this data it should be almost zero cost to retrieve and pass it -- it adds a single register load for the argument from the same cache line to which we just completed a store and the register is dead on return from the call. I extended the data element from unsigned int to unsigned long in case some other code wanted to associate a pointer. The doorbell check_self is replaced by a call to smp_muxed_ipi_resend, conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed to CONFIG_SMP but I left it with BOOKE for now. Also, the doorbell interrupt vector for book-e was not calling irq_enter and irq_exit, which throws off cpu accounting and causes code to not realize it is running in interrupt context. Add the missing calls. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dbell.c | 46 ++++------------------ arch/powerpc/kernel/irq.c | 4 +- arch/powerpc/kernel/smp.c | 94 ++++++++++++++++++++++++++++++--------------- 3 files changed, 73 insertions(+), 71 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index e49b24c84133..2cc451aaaca7 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,65 +13,35 @@ #include #include #include -#include +#include #include #include #ifdef CONFIG_SMP -struct doorbell_cpu_info { - unsigned long messages; /* current messages bits */ - unsigned int tag; /* tag value */ -}; - -static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); - void doorbell_setup_this_cpu(void) { - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; - info->messages = 0; - info->tag = mfspr(SPRN_PIR) & 0x3fff; + smp_muxed_ipi_set_data(smp_processor_id(), tag); } -void doorbell_message_pass(int cpu, int msg) +void doorbell_cause_ipi(int cpu, unsigned long data) { - struct doorbell_cpu_info *info; - - info = &per_cpu(doorbell_cpu_info, cpu); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); + ppc_msgsnd(PPC_DBELL, 0, data); } void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - int msg; - /* Warning: regs can be NULL when called from irq enable */ + irq_enter(); - if (!info->messages || (num_online_cpus() < 2)) - goto out; + smp_ipi_demux(); - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &info->messages)) - smp_message_recv(msg); - -out: + irq_exit(); set_irq_regs(old_regs); } - -void doorbell_check_self(void) -{ - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - - if (!info->messages) - return; - - ppc_msgsnd(PPC_DBELL, 0, info->tag); -} - #else /* CONFIG_SMP */ void doorbell_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a81dd74414bf..826552cecebd 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -66,7 +66,6 @@ #include #include #include -#include #include #ifdef CONFIG_PPC64 @@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en) #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) /* Check for pending doorbell interrupts and resend to ourself */ - doorbell_check_self(); + if (cpu_has_feature(CPU_FTR_DBELL)) + smp_muxed_ipi_resend(); #endif /* diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b74411446922..fa8e8700064b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -111,35 +111,6 @@ int __devinit smp_generic_kick_cpu(int nr) } #endif -void smp_message_recv(int msg) -{ - switch(msg) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - /* we notice need_resched on exit */ - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - break; - } -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); - break; -#endif /* CONFIG_DEBUGGER */ - /* FALLTHROUGH */ - default: - printk("SMP %d: smp_message_recv(): unknown msg %d\n", - smp_processor_id(), msg); - break; - } -} - static irqreturn_t call_function_action(int irq, void *data) { generic_smp_call_function_interrupt(); @@ -158,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +irqreturn_t debug_ipi_action(int irq, void *data) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(get_irq_regs()); + return IRQ_HANDLED; + } + +#ifdef CONFIG_DEBUGGER + debugger_ipi(get_irq_regs()); +#endif /* CONFIG_DEBUGGER */ + return IRQ_HANDLED; } @@ -199,6 +178,59 @@ int smp_request_message_ipi(int virq, int msg) return err; } +struct cpu_messages { + unsigned long messages; /* current messages bits */ + unsigned long data; /* data for cause ipi */ +}; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); + +void smp_muxed_ipi_set_data(int cpu, unsigned long data) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + info->data = data; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + unsigned long *tgt = &info->messages; + + set_bit(msg, tgt); + mb(); + smp_ops->cause_ipi(cpu, info->data); +} + +void smp_muxed_ipi_resend(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned long *tgt = &info->messages; + + if (*tgt) + smp_ops->cause_ipi(smp_processor_id(), info->data); +} + +irqreturn_t smp_ipi_demux(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned long *tgt = &info->messages; + + mb(); /* order any irq clear */ + while (*tgt) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) + generic_smp_call_function_interrupt(); + if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) + reschedule_action(0, NULL); /* upcoming sched hook */ + if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) + generic_smp_call_function_single_interrupt(); +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) + if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) + debug_ipi_action(0, NULL); +#endif + } + return IRQ_HANDLED; +} + void smp_send_reschedule(int cpu) { if (likely(smp_ops)) -- cgit v1.2.2 From 1ece355b6825b7c61d1dc39a5c6cf49dc746e193 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:42 +0000 Subject: powerpc: Add kconfig for muxed smp ipi support Compile the new smp ipi mux and demux code only if a platform will make use of it. The new config is selected as required. The new cause_ipi smp op is only available conditionally to point out configs where the select is required; this makes setting the op an immediate fail instead of a deferred unresolved symbol at link. This also creates a new config for power surge powermac upgrade support that can be disabled in expert mode but is default on. I also removed the depends / default y on CONFIG_XICS since it is selected by PSERIES. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index fa8e8700064b..d76f7d7929be 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -178,6 +178,7 @@ int smp_request_message_ipi(int virq, int msg) return err; } +#ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { unsigned long messages; /* current messages bits */ unsigned long data; /* data for cause ipi */ @@ -230,6 +231,7 @@ irqreturn_t smp_ipi_demux(void) } return IRQ_HANDLED; } +#endif /* CONFIG_PPC_SMP_MUXED_IPI */ void smp_send_reschedule(int cpu) { -- cgit v1.2.2 From 714542721b4a53a3ebbdd5f0619ac0f66e7df610 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:46 +0000 Subject: powerpc: Use bytes instead of bitops in smp ipi multiplexing Since there are only 4 messages, we can replace the atomic bit set (which uses atomic load reserve and store conditional sequence) with a byte stores to seperate bytes. We still have to perform a load reserve and store conditional sequence to avoid loosing messages on reception but we can do that with a single call to xchg. The do {} while and __BIG_ENDIAN specific mask testing was chosen by looking at the generated asm code. On gcc-4.4, the bit masking becomes a simple bit mask and test of the register returned from xchg without storing and loading the value to the stack like attempts with a union of bytes and an int (or worse, loading single bit constants from the constant pool into non-voliatle registers that had to be preseved on the stack). The do {} while avoids an unconditional branch to the end of the loop to test the entry / repeat condition of a while loop and instead optimises for the expected single iteration of the loop. We have a full mb() at the beginning to cover ordering between send, ipi, and receive so we can use xchg_local and forgo the further acquire and release barriers of xchg. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d76f7d7929be..a8909aa50642 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -180,7 +180,7 @@ int smp_request_message_ipi(int virq, int msg) #ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { - unsigned long messages; /* current messages bits */ + int messages; /* current messages */ unsigned long data; /* data for cause ipi */ }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); @@ -195,9 +195,9 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data) void smp_muxed_ipi_message_pass(int cpu, int msg) { struct cpu_messages *info = &per_cpu(ipi_message, cpu); - unsigned long *tgt = &info->messages; + char *message = (char *)&info->messages; - set_bit(msg, tgt); + message[msg] = 1; mb(); smp_ops->cause_ipi(cpu, info->data); } @@ -205,30 +205,35 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) void smp_muxed_ipi_resend(void) { struct cpu_messages *info = &__get_cpu_var(ipi_message); - unsigned long *tgt = &info->messages; - if (*tgt) + if (info->messages) smp_ops->cause_ipi(smp_processor_id(), info->data); } irqreturn_t smp_ipi_demux(void) { struct cpu_messages *info = &__get_cpu_var(ipi_message); - unsigned long *tgt = &info->messages; + unsigned int all; mb(); /* order any irq clear */ - while (*tgt) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) + + do { + all = xchg_local(&info->messages, 0); + +#ifdef __BIG_ENDIAN + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) generic_smp_call_function_interrupt(); - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) + if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) reschedule_action(0, NULL); /* upcoming sched hook */ - if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) generic_smp_call_function_single_interrupt(); -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) + if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) debug_ipi_action(0, NULL); +#else +#error Unsupported ENDIAN #endif - } + } while (info->messages); + return IRQ_HANDLED; } #endif /* CONFIG_PPC_SMP_MUXED_IPI */ -- cgit v1.2.2 From 3af259d1555a93b3b6f6545af13e0eb99b0d5d32 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:53 +0000 Subject: powerpc: Radix trees are available before init_IRQ Since the generic irq code uses a radix tree for sparse interrupts, the initcall ordering has been changed to initialize radix trees before irqs. We no longer need to defer creating revmap radix trees to the arch_initcall irq_late_init. Also, the kmem caches are allocated so we don't need to use zalloc_maybe_bootmem. Signed-off-by: Milton Miller Reviewed-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 78 +++-------------------------------------------- 1 file changed, 4 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 826552cecebd..f42e869ee3cc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -493,7 +493,6 @@ struct irq_map_entry { static LIST_HEAD(irq_hosts); static DEFINE_RAW_SPINLOCK(irq_big_lock); -static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); static struct irq_map_entry irq_map[NR_IRQS]; static unsigned int irq_virq_count = NR_IRQS; @@ -537,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, /* Allocate structure and revmap table if using linear mapping */ if (revmap_type == IRQ_HOST_MAP_LINEAR) size += revmap_arg * sizeof(unsigned int); - host = zalloc_maybe_bootmem(size, GFP_KERNEL); + host = kzalloc(size, GFP_KERNEL); if (host == NULL) return NULL; @@ -605,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); host->revmap_data.linear.revmap = rmap; break; + case IRQ_HOST_MAP_TREE: + INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL); + break; default: break; } @@ -839,13 +841,6 @@ void irq_dispose_mapping(unsigned int virq) host->revmap_data.linear.revmap[hwirq] = NO_IRQ; break; case IRQ_HOST_MAP_TREE: - /* - * Check if radix tree allocated yet, if not then nothing to - * remove. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) - break; mutex_lock(&revmap_trees_mutex); radix_tree_delete(&host->revmap_data.tree, hwirq); mutex_unlock(&revmap_trees_mutex); @@ -905,14 +900,6 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - /* - * Check if the radix tree exists and has bee initialized. - * If not, we fallback to slow mode - */ - if (revmap_trees_allocated < 2) - return irq_find_mapping(host, hwirq); - - /* Now try to resolve */ /* * No rcu_read_lock(ing) needed, the ptr returned can't go under us * as it's referencing an entry in the static irq_map table. @@ -935,18 +922,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - /* - * Check if the radix tree exists yet. - * If not, then the irq will be inserted into the tree when it gets - * initialized. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) - return; - if (virq != NO_IRQ) { mutex_lock(&revmap_trees_mutex); radix_tree_insert(&host->revmap_data.tree, hwirq, @@ -1054,53 +1031,6 @@ int arch_early_irq_init(void) return 0; } -/* We need to create the radix trees late */ -static int irq_late_init(void) -{ - struct irq_host *h; - unsigned int i; - - /* - * No mutual exclusion with respect to accessors of the tree is needed - * here as the synchronization is done via the state variable - * revmap_trees_allocated. - */ - list_for_each_entry(h, &irq_hosts, link) { - if (h->revmap_type == IRQ_HOST_MAP_TREE) - INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL); - } - - /* - * Make sure the radix trees inits are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 1; - - /* - * Insert the reverse mapping for those interrupts already present - * in irq_map[]. - */ - mutex_lock(&revmap_trees_mutex); - for (i = 0; i < irq_virq_count; i++) { - if (irq_map[i].host && - (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE)) - radix_tree_insert(&irq_map[i].host->revmap_data.tree, - irq_map[i].hwirq, &irq_map[i]); - } - mutex_unlock(&revmap_trees_mutex); - - /* - * Make sure the radix trees insertions are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 2; - - return 0; -} -arch_initcall(irq_late_init); - #ifdef CONFIG_VIRQ_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { -- cgit v1.2.2 From 2d441681a4df7822e6ef6fcc0320bb14d2a06dbb Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:29:57 +0000 Subject: powerpc: Return early if irq_host lookup type is wrong If for some reason the code incrorectly calls the wrong function to manage the revmap, not only should we warn, we should take action. However, in the paths we expect to be taken every delivered interrupt change to WARN_ON_ONCE. Use the if (WARN_ON(x)) format to get the unlikely for free. Signed-off-by: Milton Miller Reviewed-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f42e869ee3cc..4a5aa8ca97a5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -814,8 +814,7 @@ void irq_dispose_mapping(unsigned int virq) return; host = irq_map[virq].host; - WARN_ON (host == NULL); - if (host == NULL) + if (WARN_ON(host == NULL)) return; /* Never unmap legacy interrupts */ @@ -898,7 +897,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, struct irq_map_entry *ptr; unsigned int virq; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE)) + return irq_find_mapping(host, hwirq); /* * No rcu_read_lock(ing) needed, the ptr returned can't go under us @@ -922,7 +922,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); + if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE)) + return; if (virq != NO_IRQ) { mutex_lock(&revmap_trees_mutex); @@ -937,7 +938,8 @@ unsigned int irq_linear_revmap(struct irq_host *host, { unsigned int *revmap; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR); + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR)) + return irq_find_mapping(host, hwirq); /* Check revmap bounds */ if (unlikely(hwirq >= host->revmap_data.linear.size)) -- cgit v1.2.2 From da0519800260a3c791b4fe3317a0c7560027a372 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:30:26 +0000 Subject: powerpc: Remove irq_host_ops->remap hook It was called from irq_create_mapping if that was called for a host and hwirq that was previously mapped, "to update the flags". But the only implementation was in beat_interrupt and all it did was repeat a hypervisor call without error checking that was performed with error checking at the beginning of the map hook. In addition, the comment on the beat remap hook says it will only called once for a given mapping, which would apply to map not remap. All flags should be known by the time the match hook is called, before we call the map hook. Removing this mostly unused hook will simpify the requirements of irq_domain concept. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4a5aa8ca97a5..0715a09a4101 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -732,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host, */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { - if (host->ops->remap) - host->ops->remap(host, virq, hwirq); pr_debug("irq: -> existing mapping on virq %d\n", virq); return virq; } -- cgit v1.2.2 From 3ee62d365b519c0c18c774049efcde84fe51c60c Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:30:36 +0000 Subject: powerpc: Add virq_is_host to reduce virq_to_host usage Some irq_host implementations are using virq_to_host to check if they are the irq_host for a virtual irq. To allow us to make space versus time tradeoffs, replace this usage with an assertive virq_is_host that confirms or denies the irq is associated with the given irq_host. Signed-off-by: Milton Miller Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0715a09a4101..73cf29078fef 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -510,6 +510,12 @@ irq_hw_number_t virq_to_hw(unsigned int virq) } EXPORT_SYMBOL_GPL(virq_to_hw); +bool virq_is_host(unsigned int virq, struct irq_host *host) +{ + return irq_map[virq].host == host; +} +EXPORT_SYMBOL_GPL(virq_is_host); + struct irq_host *virq_to_host(unsigned int virq) { return irq_map[virq].host; -- cgit v1.2.2 From 1e8c23013ed0d535e531b3b9cc30200e884f3ff0 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:30:40 +0000 Subject: powerpc: Remove virq_to_host The only references to the irq_map[].host field are internal to arch/powerpc/kernel/irq.c Signed-off-by: Milton Miller Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 73cf29078fef..4368b5ed5604 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -516,12 +516,6 @@ bool virq_is_host(unsigned int virq, struct irq_host *host) } EXPORT_SYMBOL_GPL(virq_is_host); -struct irq_host *virq_to_host(unsigned int virq) -{ - return irq_map[virq].host; -} -EXPORT_SYMBOL_GPL(virq_to_host); - static int default_irq_host_match(struct irq_host *h, struct device_node *np) { return h->of_node != NULL && h->of_node == np; -- cgit v1.2.2 From d36b4c4f3cc6caae6d4a12d9f995513e4c3acdd5 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 6 Apr 2011 00:18:48 -0500 Subject: powerpc/fsl-booke64: Add support for Debug Level exception handler Signed-off-by: Kumar Gala --- arch/powerpc/kernel/exceptions-64e.S | 65 ++++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/setup_64.c | 8 +++++ 2 files changed, 70 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4d0abb4930a1..cf27a8fa0d29 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -253,9 +253,6 @@ exception_marker: .balign 0x1000 .globl interrupt_base_book3e interrupt_base_book3e: /* fake trap */ - /* Note: If real debug exceptions are supported by the HW, the vector - * below will have to be patched up to point to an appropriate handler - */ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ @@ -455,6 +452,68 @@ interrupt_end_book3e: kernel_dbg_exc: b . /* NYI */ +/* Debug exception as a debug interrupt*/ + START_EXCEPTION(debug_debug); + DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the DSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_DSRR1,r11 + lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXDBG+EX_R1(r13) + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXDBG+EX_R11(r13) + mfspr r13,SPRN_SPRG_DBG_SCRATCH + rfdi + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_DBG_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c2ec0a12e14f..a88bf2713d41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "setup.h" @@ -477,6 +478,9 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { + extern unsigned int interrupt_base_book3e; + extern unsigned int exc_debug_debug_book3e; + unsigned int i; for_each_possible_cpu(i) { @@ -487,6 +491,10 @@ static void __init exc_lvl_early_init(void) mcheckirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, + (unsigned long)&exc_debug_debug_book3e, 0); } #else #define exc_lvl_early_init() -- cgit v1.2.2 From 3a6e9bd7f60b29efc205485ceb11a768032c40d4 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 9 May 2011 16:26:00 -0500 Subject: powerpc/e5500: set non-base IVORs Without this, we attempt to use doorbells for IPIs, and end up branching to some bad address. Plus, even for the exceptions we don't implement, it's good to handle it and get a message out. Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 3 ++ arch/powerpc/kernel/exceptions-64e.S | 47 +++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 913611105c1f..8053db02b85e 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500) bl __e500_dcache_setup #ifdef CONFIG_PPC_BOOK3E_64 bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + bl .setup_ehv_ivors #else bl __setup_e500mc_ivors #endif diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index cf27a8fa0d29..d24d4400cc79 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -269,8 +269,13 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x260, perfmon) EXCEPTION_STUB(0x280, doorbell) EXCEPTION_STUB(0x2a0, doorbell_crit) + EXCEPTION_STUB(0x2c0, guest_doorbell) + EXCEPTION_STUB(0x2e0, guest_doorbell_crit) + EXCEPTION_STUB(0x300, hypercall) + EXCEPTION_STUB(0x320, ehpriv) .globl interrupt_end_book3e interrupt_end_book3e: @@ -514,6 +519,8 @@ kernel_dbg_exc: bl .DebugException b .ret_from_except + MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) + /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) @@ -528,6 +535,11 @@ kernel_dbg_exc: // b ret_from_crit_except b . + MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) + /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -647,7 +659,12 @@ fast_exception_return: BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x2c0) +BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x310) +BAD_STACK_TRAMPOLINE(0x320) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -1183,3 +1200,33 @@ _GLOBAL(__setup_base_ivors) sync blr + +_GLOBAL(setup_perfmon_ivor) + SET_IVOR(35, 0x260) /* Performance Monitor */ + blr + +_GLOBAL(setup_doorbell_ivors) + SET_IVOR(36, 0x280) /* Processor Doorbell */ + SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ + + /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ + blr + +_GLOBAL(setup_ehv_ivors) + /* + * We may be running as a guest and lack E.HV even on a chip + * that normally has it. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ + SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + blr -- cgit v1.2.2 From 41fb5e62604c5ddd00a784ffb7672dd8df5d76f2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 10 May 2011 19:30:44 +0000 Subject: powerpc: Make IRQ_NOREQUEST last to clear, first to set When creating an irq, don't allow a concurent driver request until we have caled map, which will likley call set_chip_and_handler to change the irq_chip and its operations. Similarly, when tearing down an IRQ, make sure no new uses come along while we change the irq back to the nop chip and then reset the descriptor to freed status. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4368b5ed5604..a24d37d4cf51 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -586,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, irq_map[i].host = host; smp_wmb(); - /* Clear norequest flags */ - irq_clear_status_flags(i, IRQ_NOREQUEST); - /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to * explicitly change them */ ops->map(host, i, i); + + /* Clear norequest flags */ + irq_clear_status_flags(i, IRQ_NOREQUEST); } break; case IRQ_HOST_MAP_LINEAR: @@ -664,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto error; } - irq_clear_status_flags(virq, IRQ_NOREQUEST); - /* map it */ smp_wmb(); irq_map[virq].hwirq = hwirq; @@ -676,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto errdesc; } + irq_clear_status_flags(virq, IRQ_NOREQUEST); + return 0; errdesc: @@ -819,6 +819,8 @@ void irq_dispose_mapping(unsigned int virq) if (host->revmap_type == IRQ_HOST_MAP_LEGACY) return; + irq_set_status_flags(virq, IRQ_NOREQUEST); + /* remove chip and handler */ irq_set_chip_and_handler(virq, NULL, NULL); @@ -848,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq) smp_mb(); irq_map[virq].hwirq = host->inval_irq; - irq_set_status_flags(virq, IRQ_NOREQUEST); - irq_free_descs(virq, 1); /* Free it */ irq_free_virt(virq, 1); -- cgit v1.2.2 From 03bf469add176afd8a1a4c493d9f4e0e520db12b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 May 2011 20:58:18 +0000 Subject: powerpc: Make early memory scan more resilient to out of order nodes We keep track of the size of the lowest block of memory and call setup_initial_memory_limit() only after we've parsed them all Signed-off-by: Benjamin Herrenschmidt Acked-by: Milton Miller --- arch/powerpc/kernel/prom.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 5f5e6aed2b70..5311a26dcf46 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -68,6 +68,7 @@ int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; #endif +static phys_addr_t first_memblock_size; static int __init early_parse_mem(char *p) { @@ -505,11 +506,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) size = 0x80000000ul - base; } #endif - - /* First MEMBLOCK added, do some special initializations */ - if (memstart_addr == ~(phys_addr_t)0) - setup_initial_memory_limit(base, size); - memstart_addr = min((u64)memstart_addr, base); + /* Keep track of the beginning of memory -and- the size of + * the very first block in the device-tree as it represents + * the RMA on ppc64 server + */ + if (base < memstart_addr) { + memstart_addr = base; + first_memblock_size = size; + } /* Add the chunk to the MEMBLOCK list */ memblock_add(base, size); @@ -694,6 +698,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); -- cgit v1.2.2 From 2c78027a62ea38585da1ff944afdc6146335cb7c Mon Sep 17 00:00:00 2001 From: Gabriel Paubert Date: Fri, 13 May 2011 01:03:13 +0000 Subject: powerpc: Fix for Pegasos keyboard and mouse [See http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-October/086424.html and followups. Part of the commit message is directly copied from that.] Commit 540c6c392f01887dcc96bef0a41e63e6c1334f01 tries to find i8042 IRQs in the device-tree but doesn't fall back to the old hardcoded 1 and 12 in all failure cases. Specifically, the case where the device-tree contains nothing matching pnpPNP,303 or pnpPNP,f03 doesn't seem to be handled well. It sort of falls through to the old code, but leaves the IRQs set to 0. Signed-off-by: Gabriel Paubert Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ef33a084fcf4..79fca2651b65 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port) * name instead */ if (!np) np = of_find_node_by_name(NULL, "8042"); + if (np) { + of_i8042_kbd_irq = 1; + of_i8042_aux_irq = 12; + } break; case FDC_BASE: /* FDC1 */ np = of_find_node_by_type(NULL, "fdc"); -- cgit v1.2.2 From f38aa708776aefd9e3ba7ec1211c07efe9fa3227 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Mon, 16 May 2011 08:58:13 +0000 Subject: powerpc: Remove last piece of GEMINI It seems that Adrian is getting old. He removed almost everything of GEMINI in commit c53653130 ("[POWERPC] Remove the broken Gemini support") except this piece. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_32.S | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 98c4b29a56f4..ba250d505e07 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -805,19 +805,6 @@ _ENTRY(copy_and_flush) blr #ifdef CONFIG_SMP -#ifdef CONFIG_GEMINI - .globl __secondary_start_gemini -__secondary_start_gemini: - mfspr r4,SPRN_HID0 - ori r4,r4,HID0_ICFI - li r3,0 - ori r3,r3,HID0_ICE - andc r4,r4,r3 - mtspr SPRN_HID0,r4 - sync - b __secondary_start -#endif /* CONFIG_GEMINI */ - .globl __secondary_start_mpc86xx __secondary_start_mpc86xx: mfspr r3, SPRN_PIR -- cgit v1.2.2 From 208b3a4c196e733b9cec006dc132cfc149b2810a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 20 May 2011 17:50:18 +1000 Subject: powerpc: Fix hard CPU IDs detection commit 9d07bc841c9779b4d7902e417f4e509996ce805d "powerpc: Properly handshake CPUs going out of boot spin loop" Would cause a miscalculation of the hard CPU ID. It removes breaking out of the loop when finding a match with a processor, thus the "i" used as an index in the intserv array is always incorrect This broke interrupt on my PowerMac laptop. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 5311a26dcf46..48aeb55faae9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -278,6 +278,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, int i, nthreads; unsigned long len; int found = -1; + int found_thread = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -301,9 +302,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (initial_boot_params && initial_boot_params->version >= 2) { - if (intserv[i] == initial_boot_params->boot_cpuid_phys) + if (initial_boot_params->version >= 2) { + if (intserv[i] == initial_boot_params->boot_cpuid_phys) { found = boot_cpu_count; + found_thread = i; + } } else { /* * Check if it's the boot-cpu, set it's hw index now, @@ -322,9 +325,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (found >= 0) { DBG("boot cpu: logical %d physical %d\n", found, - intserv[i]); + intserv[found_thread]); boot_cpuid = found; - set_hard_smp_processor_id(found, intserv[i]); + set_hard_smp_processor_id(found, intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that -- cgit v1.2.2 From cce1f106c64dc1d19d5e9406320fde18dfc662df Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Thu, 18 Nov 2010 14:57:32 +0800 Subject: powerpc/fsl_rio: move machine_check handler Add support for machine_check support into machine_check_e500 and machine_check_e500mc. Signed-off-by: Shaohui Xie Cc: Li Yang Cc: Roy Zang Cc: Alexandre Bounine Signed-off-by: Kumar Gala --- arch/powerpc/kernel/traps.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b13306b0d925..0ff4ab98d50c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -55,6 +55,7 @@ #endif #include #include +#include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -424,6 +425,12 @@ int machine_check_e500mc(struct pt_regs *regs) unsigned long reason = mcsr; int recoverable = 1; + if (reason & MCSR_BUS_RBERR) { + recoverable = fsl_rio_mcheck_exception(regs); + if (recoverable == 1) + goto silent_out; + } + printk("Machine check in kernel mode.\n"); printk("Caused by (from MCSR=%lx): ", reason); @@ -499,6 +506,7 @@ int machine_check_e500mc(struct pt_regs *regs) reason & MCSR_MEA ? "Effective" : "Physical", addr); } +silent_out: mtspr(SPRN_MCSR, mcsr); return mfspr(SPRN_MCSR) == 0 && recoverable; } @@ -507,6 +515,11 @@ int machine_check_e500(struct pt_regs *regs) { unsigned long reason = get_mc_reason(regs); + if (reason & MCSR_BUS_RBERR) { + if (fsl_rio_mcheck_exception(regs)) + return 1; + } + printk("Machine check in kernel mode.\n"); printk("Caused by (from MCSR=%lx): ", reason); -- cgit v1.2.2 From 6de06f313a65d0ecabf055e708d082002b568866 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Fri, 20 May 2011 16:22:25 -0400 Subject: powerpc: Fix 32-bit SMP build Commit 69e3cea8d5fd526 ("powerpc/smp: Make start_secondary_resume available to all CPU variants") introduced start_secondary_resume to misc_32.S, however it uses a 64-bit instruction which is not valid on 32-bit platforms. Use 'stw' instead. Reported-by: Richard Cochran Tested-by: Richard Cochran Signed-off-by: Josh Boyer Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/misc_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 402560e957bd..998a10028608 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -700,7 +700,7 @@ _GLOBAL(start_secondary_resume) rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD li r3,0 - std r3,0(r1) /* Zero the stack frame pointer */ + stw r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary b . #endif /* CONFIG_SMP */ -- cgit v1.2.2 From eab176722f4628b2d9cf76221a43dd3a0e37e632 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 27 Apr 2011 17:24:10 -0500 Subject: KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0) Linux doesn't use USPRG0 (now renamed VRSAVE in the architecture, even when Altivec isn't involved), but a guest might. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145ab..cf0d82278951 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -395,6 +395,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); -- cgit v1.2.2 From 354258011e8e86961f7a72ad154ca8caf0c4c6f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 May 2011 23:35:55 +0200 Subject: PM / Hibernate: Remove arch_prepare_suspend() All architectures supporting hibernation define arch_prepare_suspend() as an empty function, so remove it. Signed-off-by: Rafael J. Wysocki --- arch/powerpc/kernel/swsusp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index 560c96119501..aa17b76dd427 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include -- cgit v1.2.2 From d6bf29b44ddf3ca915f77b9383bee8b7a209f3fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:11:48 -0700 Subject: powerpc: mmu_gather rework Fix up powerpc to the new mmu_gather stuff. PPC has an extra batching queue to RCU free the actual pagetable allocations, use the ARCH extentions for that for now. For the ppc64_tlb_batch, which tracks the vaddrs to unhash from the hardware hash-table, keep using per-cpu arrays but flush on context switch and use a TLF bit to track the lazy_mmu state. Signed-off-by: Peter Zijlstra Acked-by: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 095043d79946..91e52df3d81d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -395,6 +395,9 @@ struct task_struct *__switch_to(struct task_struct *prev, struct thread_struct *new_thread, *old_thread; unsigned long flags; struct task_struct *last; +#ifdef CONFIG_PPC_BOOK3S_64 + struct ppc64_tlb_batch *batch; +#endif #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu @@ -513,7 +516,17 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } -#endif +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 + batch = &__get_cpu_var(ppc64_tlb_batch); + if (batch->active) { + current_thread_info()->local_flags |= _TLF_LAZY_MMU; + if (batch->index) + __flush_tlb_pending(batch); + batch->active = 0; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ local_irq_save(flags); @@ -528,6 +541,14 @@ struct task_struct *__switch_to(struct task_struct *prev, hard_irq_disable(); last = _switch(old_thread, new_thread); +#ifdef CONFIG_PPC_BOOK3S_64 + if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { + current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; + batch = &__get_cpu_var(ppc64_tlb_batch); + batch->active = 1; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ + local_irq_restore(flags); return last; -- cgit v1.2.2 From 02424d8966d803e33cbe51469be56b5d177b4a37 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 2 Feb 2011 17:27:24 +0000 Subject: powerpc/ftrace: Implement raw syscall tracepoints on PowerPC This patch implements the raw syscall tracepoints on PowerPC and exports them for ftrace syscalls to use. To minimise reworking existing code, I slightly re-ordered the thread info flags such that the new TIF_SYSCALL_TRACEPOINT bit would still fit within the 16 bits of the andi. instruction's UI field. The instructions in question are in /arch/powerpc/kernel/entry_{32,64}.S to and the _TIF_SYSCALL_T_OR_A with the thread flags to see if system call tracing is enabled. In the case of 64bit PowerPC, arch_syscall_addr and arch_syscall_match_sym_name are overridden to allow ftrace syscalls to work given the unusual system call table structure and symbol names that start with a period. Signed-off-by: Ian Munsie Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/ftrace.c | 8 ++++++++ arch/powerpc/kernel/ptrace.c | 10 ++++++++++ 3 files changed, 19 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9aab36312572..e8b981897d44 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index ce1f3e44c24f..bf99cfa6bbfe 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_DYNAMIC_FTRACE @@ -600,3 +601,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) +unsigned long __init arch_syscall_addr(int nr) +{ + return sys_call_table[nr*2]; +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index a6ae1cfad86c..cb22024f2b42 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 #include #endif @@ -40,6 +41,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /* * The parameter save area on the stack is used to store arguments being passed * to callee function and is located at fixed offset from stack pointer. @@ -1710,6 +1714,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 if (!is_32bit_task()) @@ -1738,6 +1745,9 @@ void do_syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); -- cgit v1.2.2 From 7ef71d753ea0286bfeb4251b9ba592716ebdd9e8 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:18 +0000 Subject: powerpc/cell: Use common smp ipi actions The cell iic interrupt controller has enough software caused interrupts to use a unique interrupt for each of the 4 messages powerpc uses. This means each interrupt gets its own irq action/data combination. Use the seperate, optimized, arch common ipi action functions registered via the helper smp_request_message_ipi instead passing the message as action data to a single action that then demultipexes to the required acton via a switch statement. smp_request_message_ipi will register the action as IRQF_PER_CPU and IRQF_DISABLED, and WARN if the allocation fails for some reason, so no need to print on that failure. It will return positive if the message will not be used by the kernel, in which case we can free the virq. In addition to elimiating inefficient code, this also corrects the error that a kernel built with kexec but without a debugger would not register the ipi for kdump to notify the other cpus of a crash. This also restores the debugger action to be static to kernel/smp.c. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4a6f2ec7e761..8ebc6700b98d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -129,7 +129,7 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -irqreturn_t debug_ipi_action(int irq, void *data) +static irqreturn_t debug_ipi_action(int irq, void *data) { if (crash_ipi_function_ptr) { crash_ipi_function_ptr(get_irq_regs()); -- cgit v1.2.2 From 8142f032a90680ed2fb080ca694ffe9509276361 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:18 +0000 Subject: powerpc/irq: Remove stale and misleading comment The comment claims we will call host->ops->map() to update the flags if we find a previously established mapping, but we never did. We used to call remap, but that call was removed in da05198002 (powerpc: Remove irq_host_ops->remap hook). Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a24d37d4cf51..f2fd6539d8bf 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -727,9 +727,7 @@ unsigned int irq_create_mapping(struct irq_host *host, } pr_debug("irq: -> using host @%p\n", host); - /* Check if mapping already exist, if it does, call - * host->ops->map() to update the flags - */ + /* Check if mapping already exists */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { pr_debug("irq: -> existing mapping on virq %d\n", virq); -- cgit v1.2.2 From 3d1b5e206a4f0ce46f2aa138590738c5d8e118ac Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:17 +0000 Subject: powerpc/irq: Always free duplicate IRQ_LEGACY hosts Since kmem caches are allocated before init_IRQ as noted in 3af259d155 (powerpc: Radix trees are available before init_IRQ), we now call kmalloc in all cases and can can always call kfree if we are asked to allocate a duplicate or conflicting IRQ_HOST_MAP_LEGACY host. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f2fd6539d8bf..0df255414836 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -557,15 +557,8 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { raw_spin_unlock_irqrestore(&irq_big_lock, flags); - /* If we are early boot, we can't free the structure, - * too bad... - * this will be fixed once slab is made available early - * instead of the current cruft - */ - if (mem_init_done) { - of_node_put(host->of_node); - kfree(host); - } + of_node_put(host->of_node); + kfree(host); return NULL; } irq_map[0].host = host; -- cgit v1.2.2 From 2e455257d143f54b44701e947a092d513889d01c Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:18 +0000 Subject: powerpc/irq: Check desc in handle_one_irq and expand generic_handle_irq Look up the descriptor and check that it is found in handle_one_irq before checking if we are on the irq stack, and call the handler directly using the descriptor if we are on the stack. We need check irq_to_desc finds the descriptor to avoid a NULL pointer dereference. It could have failed because the number from ppc_md.get_irq was above NR_IRQS, or various exceptional conditions with sparse irqs (eg race conditions while freeing an irq if its was not shutdown in the controller). fe12bc2c99 (genirq: Uninline and sanity check generic_handle_irq()) moved generic_handle_irq out of line to allow its use by interrupt controllers in modules. However, handle_one_irq is core arch code. It already knows the details of struct irq_desc and handling irqs in the nested irq case. This will avoid the extra stack frame to return the value we don't check. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0df255414836..ac4d29119f3e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -295,17 +295,20 @@ static inline void handle_one_irq(unsigned int irq) unsigned long saved_sp_limit; struct irq_desc *desc; + desc = irq_to_desc(irq); + if (!desc) + return; + /* Switch to the irq stack to handle this */ curtp = current_thread_info(); irqtp = hardirq_ctx[smp_processor_id()]; if (curtp == irqtp) { /* We're already on the irq stack, just handle it */ - generic_handle_irq(irq); + desc->handle_irq(irq, desc); return; } - desc = irq_to_desc(irq); saved_sp_limit = current->thread.ksp_limit; irqtp->task = curtp->task; -- cgit v1.2.2 From 9b7882515864117d0015a3484c0ba0eee6713de9 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:18 +0000 Subject: powerpc/irq: Protect irq_radix_revmap_lookup against irq_free_virt The radix-tree code uses call_rcu when freeing internal elements. We must protect against the elements being freed while we traverse the tree, even if the returned pointer will still be valid. While preparing a patch to expand the context in which irq_radix_revmap_lookup will be called, I realized that the radix tree was not locked. When asked For a normal call_rcu usage, is it allowed to read the structure in irq_enter / irq_exit, without additional rcu_read_lock? Could an element freed with call_rcu advance with the cpu still between irq_enter/irq_exit (and irq_disabled())? Paul McKenney replied: Absolutely illegal to do so. OK for call_rcu_sched(), but a flaming bug for call_rcu(). And thank you very much for finding this!!! Further analysis: In the current CONFIG_TREE_RCU implementation. CONFIG_TREE_PREEMPT_RCU (and CONFIG_TINY_PREEMPT_RCU) uses explicit counters. These counters are reflected from per-CPU to global in the scheduling-clock-interrupt handler, so disabling irq does prevent the grace period from completing. But there are real-time implementations (such as the one use by the Concurrent guys) where disabling irq does -not- prevent the grace period from completing. While an alternative fix would be to switch radix-tree to rcu_sched, I don't want to audit the other users of radix trees (nor put alternative freeing in the library). The normal overhead for rcu_read_lock and unlock are a local counter increment and decrement. This does not show up in the rcu lockdep because in 2.6.34 commit 2676a58c98 (radix-tree: Disable RCU lockdep checking in radix tree) deemed it too hard to pass the condition of the protecting lock to the library. Signed-off-by: Milton Miller Reviewed-by: Paul E. McKenney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac4d29119f3e..6cb3fcd7fc37 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -893,10 +893,13 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, return irq_find_mapping(host, hwirq); /* - * No rcu_read_lock(ing) needed, the ptr returned can't go under us - * as it's referencing an entry in the static irq_map table. + * The ptr returned references the static global irq_map. + * but freeing an irq can delete nodes along the path to + * do the lookup via call_rcu. */ + rcu_read_lock(); ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq); + rcu_read_unlock(); /* * If found in radix tree, then fine. -- cgit v1.2.2 From 4dd602900196bcc00505485e2a363caec4f3fd93 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Tue, 24 May 2011 20:34:18 +0000 Subject: powerpc: Fix irq_free_virt by adjusting bounds before loop Instead of looping over each irq and checking against the irq array bounds, adjust the bounds before looping. The old code will not free any irq if the irq + count is above irq_virq_count because the test in the loop is testing irq + count instead of irq + i. This code checks the limits to avoid unsigned integer overflows. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6cb3fcd7fc37..5b428e308666 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -1007,14 +1007,23 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); + if (virq < NUM_ISA_INTERRUPTS) { + if (virq + count < NUM_ISA_INTERRUPTS) + return; + count =- NUM_ISA_INTERRUPTS - virq; + virq = NUM_ISA_INTERRUPTS; + } + + if (count > irq_virq_count || virq > irq_virq_count - count) { + if (virq > irq_virq_count) + return; + count = irq_virq_count - virq; + } + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; - if (i < NUM_ISA_INTERRUPTS || - (virq + count) > irq_virq_count) - continue; - host = irq_map[i].host; irq_map[i].hwirq = host->inval_irq; smp_wmb(); -- cgit v1.2.2 From fb9be2349f099d7c68b706e04fd62c478d3c0ed2 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 2 Jun 2011 11:26:13 -0500 Subject: powerpc/book3e: Fix CPU feature handling on e5500 in 32-bit mode We are missing FPU feature bit that user space may require. In the 64-bit mode this gets set since we pull it in via COMMON_USER_PPC64. We just explicitly set it so user space will be happy again. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/cputable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 34d2722b9451..9fb933248ab6 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1979,7 +1979,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_value = 0x80240000, .cpu_name = "e5500", .cpu_features = CPU_FTRS_E5500, - .cpu_user_features = COMMON_USER_BOOKE, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, .icache_bsize = 64, -- cgit v1.2.2 From 307cfe715344e15eda12dad3bb14f794115ca823 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 9 Jun 2011 16:52:38 +1000 Subject: powerpc: Force page alignment for initrd reserved memory When using 64K pages with a separate cpio rootfs, U-Boot will align the rootfs on a 4K page boundary. When the memory is reserved, and subsequent early memblock_alloc is called, it will allocate memory between the 64K page alignment and reserved memory. When the reserved memory is subsequently freed, it is done so by pages, causing the early memblock_alloc requests to be re-used, which in my case, caused the device-tree to be clobbered. This patch forces the reserved memory for initrd to be kernel page aligned, and will move the device tree if it overlaps with the range extension of initrd. This patch will also consolidate the identical function free_initrd_mem() from mm/init_32.c, init_64.c to mm/mem.c, and adds the same range extension when freeing initrd. free_initrd_mem() is also moved to the __init section. Many thanks to Milton Miller for his input on this patch. [BenH: Fixed build without CONFIG_BLK_DEV_INITRD] Signed-off-by: Dave Carroll Cc: Benjamin Herrenschmidt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f2c906b1d8d3..8c3112a57cf2 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -82,11 +82,29 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); +/* + * overlaps_initrd - check for overlap with page aligned extension of + * initrd. + */ +static inline int overlaps_initrd(unsigned long start, unsigned long size) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (!initrd_start) + return 0; + + return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= _ALIGN_UP(initrd_end, PAGE_SIZE); +#else + return 0; +#endif +} + /** * move_device_tree - move tree to an unused area, if needed. * * The device tree may be allocated beyond our memory limit, or inside the - * crash kernel region for kdump. If so, move it out of the way. + * crash kernel region for kdump, or within the page aligned range of initrd. + * If so, move it out of the way. */ static void __init move_device_tree(void) { @@ -99,7 +117,8 @@ static void __init move_device_tree(void) size = be32_to_cpu(initial_boot_params->totalsize); if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || - overlaps_crashkernel(start, size)) { + overlaps_crashkernel(start, size) || + overlaps_initrd(start, size)) { p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; @@ -555,7 +574,9 @@ static void __init early_reserve_mem(void) #ifdef CONFIG_BLK_DEV_INITRD /* then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) - memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); + memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + _ALIGN_UP(initrd_end, PAGE_SIZE) - + _ALIGN_DOWN(initrd_start, PAGE_SIZE)); #endif /* CONFIG_BLK_DEV_INITRD */ #ifdef CONFIG_PPC32 -- cgit v1.2.2 From 82a9a4809f4cb4ce3f17da99a8150df8455fa096 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 16 Jun 2011 14:09:17 -0500 Subject: powerpc/e500: fix breakage with fsl_rio_mcheck_exception The wrong MCSR bit was being used on e500mc. MCSR_BUS_RBERR only exists on e500v1/v2. Use MCSR_LD on e500mc, and remove all MCSR checking in fsl_rio_mcheck_exception as we now no longer call that function if the appropriate bit in MCSR is not set. If RIO support was enabled at compile-time, but was never probed, just return from fsl_rio_mcheck_exception rather than dereference a NULL pointer. TODO: There is still a remaining, though comparitively minor, issue in that this recovery mechanism will falsely engage if there's an unrelated MCSR_LD event at the same time as a RIO error. Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0ff4ab98d50c..6414a0d5ba65 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -425,7 +425,7 @@ int machine_check_e500mc(struct pt_regs *regs) unsigned long reason = mcsr; int recoverable = 1; - if (reason & MCSR_BUS_RBERR) { + if (reason & MCSR_LD) { recoverable = fsl_rio_mcheck_exception(regs); if (recoverable == 1) goto silent_out; -- cgit v1.2.2 From 9a8f99fab02db296815d7f0ae8ba8ce169df0063 Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Sat, 4 Jun 2011 05:35:47 +0000 Subject: powerpc/rtas-rtc: remove sideeffects of printk_ratelimit Don't use printk_ratelimit() as an additional condition for returning on an error. Because when the ratelimit is reached, printk_ratelimit will return 0 and e.g. in rtas_get_boot_time won't check for an error condition. Signed-off-by: Christian Dietrich Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas-rtc.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 77578c093dda..c57c19358a26 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -29,9 +30,10 @@ unsigned long __init rtas_get_boot_time(void) } } while (wait_time && (get_tb() < max_wait_tb)); - if (error != 0 && printk_ratelimit()) { - printk(KERN_WARNING "error: reading the clock failed (%d)\n", - error); + if (error != 0) { + printk_ratelimited(KERN_WARNING + "error: reading the clock failed (%d)\n", + error); return 0; } @@ -55,19 +57,21 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm) wait_time = rtas_busy_delay_time(error); if (wait_time) { - if (in_interrupt() && printk_ratelimit()) { + if (in_interrupt()) { memset(rtc_tm, 0, sizeof(struct rtc_time)); - printk(KERN_WARNING "error: reading clock" - " would delay interrupt\n"); + printk_ratelimited(KERN_WARNING + "error: reading clock " + "would delay interrupt\n"); return; /* delay not allowed */ } msleep(wait_time); } } while (wait_time && (get_tb() < max_wait_tb)); - if (error != 0 && printk_ratelimit()) { - printk(KERN_WARNING "error: reading the clock failed (%d)\n", - error); + if (error != 0) { + printk_ratelimited(KERN_WARNING + "error: reading the clock failed (%d)\n", + error); return; } @@ -99,9 +103,10 @@ int rtas_set_rtc_time(struct rtc_time *tm) } } while (wait_time && (get_tb() < max_wait_tb)); - if (error != 0 && printk_ratelimit()) - printk(KERN_WARNING "error: setting the clock failed (%d)\n", - error); + if (error != 0) + printk_ratelimited(KERN_WARNING + "error: setting the clock failed (%d)\n", + error); return 0; } -- cgit v1.2.2 From 76462232c21dc011462522387ddad0598a4f11e4 Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Sat, 4 Jun 2011 05:36:54 +0000 Subject: arch/powerpc: use printk_ratelimited instead of printk_ratelimit Since printk_ratelimit() shouldn't be used anymore (see comment in include/linux/printk.h), replace it with printk_ratelimited. Signed-off-by: Christian Dietrich Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/signal_32.c | 57 ++++++++++++++++++++++------------------- arch/powerpc/kernel/signal_64.c | 17 ++++++------ arch/powerpc/kernel/traps.c | 22 ++++++++-------- 3 files changed, 50 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b96a3a010c26..78b76dc54dfb 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include #include @@ -892,11 +893,12 @@ badframe: printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); #endif - if (show_unhandled_signals && printk_ratelimit()) - printk(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in handle_rt_signal32: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + addr, regs->nip, regs->link); force_sigsegv(sig, current); return 0; @@ -1058,11 +1060,12 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, return 0; bad: - if (show_unhandled_signals && printk_ratelimit()) - printk(KERN_INFO "%s[%d]: bad frame in sys_rt_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - rt_sf, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in sys_rt_sigreturn: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + rt_sf, regs->nip, regs->link); force_sig(SIGSEGV, current); return 0; @@ -1149,12 +1152,12 @@ int sys_debug_setcontext(struct ucontext __user *ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(ctx, regs, 1)) { - if (show_unhandled_signals && printk_ratelimit()) - printk(KERN_INFO "%s[%d]: bad frame in " - "sys_debug_setcontext: %p nip %08lx " - "lr %08lx\n", - current->comm, current->pid, - ctx, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " + "sys_debug_setcontext: %p nip %08lx " + "lr %08lx\n", + current->comm, current->pid, + ctx, regs->nip, regs->link); force_sig(SIGSEGV, current); goto out; @@ -1236,11 +1239,12 @@ badframe: printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); #endif - if (show_unhandled_signals && printk_ratelimit()) - printk(KERN_INFO "%s[%d]: bad frame in handle_signal32: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - frame, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in handle_signal32: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + frame, regs->nip, regs->link); force_sigsegv(sig, current); return 0; @@ -1288,11 +1292,12 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, return 0; badframe: - if (show_unhandled_signals && printk_ratelimit()) - printk(KERN_INFO "%s[%d]: bad frame in sys_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(KERN_INFO + "%s[%d]: bad frame in sys_sigreturn: " + "%p nip %08lx lr %08lx\n", + current->comm, current->pid, + addr, regs->nip, regs->link); force_sig(SIGSEGV, current); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index da989fff19cc..e91c736cc842 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -380,10 +381,10 @@ badframe: printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", regs, uc, &uc->uc_mcontext); #endif - if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "rt_sigreturn", - (long)uc, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, "rt_sigreturn", + (long)uc, regs->nip, regs->link); force_sig(SIGSEGV, current); return 0; @@ -468,10 +469,10 @@ badframe: printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); #endif - if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "setup_rt_frame", - (long)frame, regs->nip, regs->link); + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, "setup_rt_frame", + (long)frame, regs->nip, regs->link); force_sigsegv(signr, current); return 0; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6414a0d5ba65..1a0141426cda 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -197,12 +198,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (die("Exception in kernel mode", regs, signr)) return; } else if (show_unhandled_signals && - unhandled_signal(current, signr) && - printk_ratelimit()) { - printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, signr, - addr, regs->nip, regs->link, code); - } + unhandled_signal(current, signr)) { + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, signr, + addr, regs->nip, regs->link, code); + } memset(&info, 0, sizeof(info)); info.si_signo = signr; @@ -1342,9 +1342,8 @@ void altivec_assist_exception(struct pt_regs *regs) } else { /* didn't recognize the instruction */ /* XXX quick hack for now: set the non-Java bit in the VSCR */ - if (printk_ratelimit()) - printk(KERN_ERR "Unrecognized altivec instruction " - "in %s at %lx\n", current->comm, regs->nip); + printk_ratelimited(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); current->thread.vscr.u[3] |= 0x10000; } } @@ -1548,9 +1547,8 @@ u32 ppc_warn_emulated; void ppc_warn_emulated_print(const char *type) { - if (printk_ratelimit()) - pr_warning("%s used emulated %s instruction\n", current->comm, - type); + pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm, + type); } static int __init ppc_warn_emulated_init(void) -- cgit v1.2.2