From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:01 -0700 Subject: memblock: Introduce default allocation limit and use it to replace explicit ones This introduce memblock.current_limit which is used to limit allocations from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE). The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still be used with memblock_alloc_base() to allocate really anywhere. It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears. Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I strongly recommend that you ensure that you set an appropriate limit during boot in order to guarantee that an memblock_alloc() at any time results in something that is accessible with a simple __va(). The reason is that a subsequent patch will introduce the ability for the array to resize itself by reallocating itself. The MEMBLOCK core will honor the current limit when performing those allocations. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/memblock.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h index 3c29728b56b1..43efc345065e 100644 --- a/arch/powerpc/include/asm/memblock.h +++ b/arch/powerpc/include/asm/memblock.h @@ -5,11 +5,4 @@ #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) -#ifdef CONFIG_PPC32 -extern phys_addr_t lowmem_end_addr; -#define MEMBLOCK_REAL_LIMIT lowmem_end_addr -#else -#define MEMBLOCK_REAL_LIMIT 0 -#endif - #endif /* _ASM_POWERPC_MEMBLOCK_H */ -- cgit v1.2.2 From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:02 -0700 Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact server ppc64 though I hijack it on embedded ppc64 for similar purposes) and represents the area of memory that can be accessed in real mode (aka with MMU off), or on embedded, from the exception vectors (which is bolted in the TLB) which pretty much boils down to the same thing. We take that out of the generic MEMBLOCK data structure and move it into arch/powerpc where it belongs, renaming it to "RMA" while at it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mmu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 7ebf42ed84a2..bb40a06d3b77 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_MMU_H_ #ifdef __KERNEL__ +#include + #include #include @@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); +extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); + +#ifdef CONFIG_PPC64 +/* This is our real memory area size on ppc64 server, on embedded, we + * make it match the size our of bolted TLB area + */ +extern u64 ppc64_rma_size; +#endif /* CONFIG_PPC64 */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. -- cgit v1.2.2 From fdd374b62ca4df144c0138359dcffa83df7a0ea8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 2 Aug 2010 20:09:52 +0000 Subject: powerpc: Optimise 64bit csum_partial_copy_generic and add csum_and_copy_from_user We use the same core loop as the new csum_partial, adding in the stores and exception handling code. To keep things simple we do all the exception fixup in csum_and_copy_from_user. This wrapper function is modelled on the generic checksum code and is careful to always calculate a complete checksum even if we only copied part of the data to userspace. To test this I forced checksumming on over loopback and ran socklib (a simple TCP benchmark). On a POWER6 575 throughput improved by 19% with this patch. If I forced both the sender and receiver onto the same cpu (with the hope of shifting the benchmark from being cache bandwidth limited to cpu limited), adding this patch improved performance by 55% Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/checksum.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 7cdf358337cf..9ea58c0e7cfb 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -52,12 +52,19 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err, int *dst_err); + +#ifdef __powerpc64__ +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr); +#else /* * the same as csum_partial, but copies from src to dst while it * checksums. */ #define csum_partial_copy_from_user(src, dst, len, sum, errp) \ csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) +#endif #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) -- cgit v1.2.2 From 8c77391475bc3284a380fc46aaf0bcf26bde3ae6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 2 Aug 2010 20:11:36 +0000 Subject: powerpc: Add 64bit csum_and_copy_to_user This adds the equivalent of csum_and_copy_from_user for the receive side so we can copy and checksum in one pass. It is modelled on the generic checksum routine. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/checksum.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 9ea58c0e7cfb..ce0c28495f9a 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -57,6 +57,9 @@ extern __wsum csum_partial_copy_generic(const void *src, void *dst, #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); +#define HAVE_CSUM_COPY_USER +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, + int len, __wsum sum, int *err_ptr); #else /* * the same as csum_partial, but copies from src to dst while it -- cgit v1.2.2 From f89451fbd2b9f28f5ff156154989599ec062354b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 11 Aug 2010 01:40:27 +0000 Subject: powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3a40a992e594..f3a1fdd9cf08 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -198,6 +198,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000) +#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000) #ifndef __ASSEMBLY__ @@ -392,28 +393,31 @@ extern const char *powerpc_base_platform; CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ - CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ - CPU_FTR_CP_USE_DCBTZ) + CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ - CPU_FTR_PURR) + CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD) + CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT) + CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ + CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ -- cgit v1.2.2 From 8154c5d22d91cd16bd9985b0638c8957e4688d0e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Aug 2010 20:18:15 +0000 Subject: powerpc: Abstract indexing of lppaca structs Currently we have the lppaca structs as a simple array of NR_CPUS entries, taking up space in the data section of the kernel image. In future we would like to allocate them dynamically, so this abstracts out the accesses to the array, making it easier to change how we locate the lppaca for a given cpu in future. Specifically, lppaca[cpu] changes to lppaca_of(cpu). Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/lppaca.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 14b592dfb4e8..6b73554433a0 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -153,6 +153,8 @@ struct lppaca { extern struct lppaca lppaca[]; +#define lppaca_of(cpu) (lppaca[cpu]) + /* * SLB shadow buffer structure as defined in the PAPR. The save_area * contains adjacent ESID and VSID pairs for each shadowed SLB. The -- cgit v1.2.2 From 93c22703efa72c7527dbd586d1951c1f4a85fd70 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Aug 2010 20:18:48 +0000 Subject: powerpc: Dynamically allocate most lppaca structs This arranges for the lppaca structs for most cpus to be dynamically allocated in the same manner as the paca structs. If we don't include support for legacy iSeries, only the first lppaca is statically allocated; the rest are dynamically allocated. If we include legacy iSeries support, then we statically allocate the first 64 lppaca structs, since the iSeries hypervisor requires that the lppaca structs be present in the data section of the kernel image, but legacy iSeries supports at most 64 cpus. With CONFIG_NR_CPUS, the kernel image size for a typical pSeries config went from: text data bss dec hex filename 9524478 4734564 8469944 22728986 15ad11a ../test-1024/vmlinux to: text data bss dec hex filename 9524482 3751508 8469944 21745934 14bd10e ../test-1024/vmlinux a reduction of 983052 bytes overall. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/lppaca.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 6b73554433a0..6d02624b622c 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -153,7 +153,7 @@ struct lppaca { extern struct lppaca lppaca[]; -#define lppaca_of(cpu) (lppaca[cpu]) +#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr) /* * SLB shadow buffer structure as defined in the PAPR. The save_area -- cgit v1.2.2 From cf9efce0ce3136fa076f53e53154e98455229514 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Aug 2010 19:56:43 +0000 Subject: powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 3 +- arch/powerpc/include/asm/lppaca.h | 19 ++++++++++++ arch/powerpc/include/asm/paca.h | 10 ++++++- arch/powerpc/include/asm/ppc_asm.h | 50 ++++++++++++++++++++------------ arch/powerpc/include/asm/time.h | 5 ---- 5 files changed, 61 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 57c400071995..7778d6f0c878 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -137,7 +137,8 @@ li r10,0; \ ld r11,exception_marker@toc(r2); \ std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + ACCOUNT_STOLEN_TIME /* * Exception vectors. diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 6d02624b622c..cfb85ec85750 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -172,6 +172,25 @@ struct slb_shadow { extern struct slb_shadow slb_shadow[]; +/* + * Layout of entries in the hypervisor's dispatch trace log buffer. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ +#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) + #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7faf..6af6c1613409 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -85,6 +85,8 @@ struct paca_struct { u8 kexec_state; /* set when kexec down has irqs off */ #ifdef CONFIG_PPC_STD_MMU_64 struct slb_shadow *slb_shadow_ptr; + struct dtl_entry *dispatch_log; + struct dtl_entry *dispatch_log_end; /* * Now, starting in cacheline 2, the exception save areas @@ -134,8 +136,14 @@ struct paca_struct { /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ u64 system_time; /* accumulated system TB ticks */ - u64 startpurr; /* PURR/TB value snapshot */ + u64 user_time_scaled; /* accumulated usermode SPURR ticks */ + u64 starttime; /* TB value snapshot */ + u64 starttime_user; /* TB value on exit to usermode */ u64 startspurr; /* SPURR value snapshot */ + u64 utime_sspurr; /* ->user_time when ->startspurr set */ + u64 stolen_time; /* TB ticks taken by hypervisor */ + u64 dtl_ridx; /* read index in dispatch log */ + struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ #ifdef CONFIG_KVM_BOOK3S_HANDLER /* We use this to store guest state in */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 498fe09263d3..98210067c1cc 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifndef __ASSEMBLY__ #error __FILE__ should only be used in assembler files @@ -26,17 +27,13 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) +#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ beq 2f; /* if from kernel mode */ \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME_USER(r13); \ + std ra,PACA_STARTTIME(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_USER_TIME(r13); \ add ra,ra,rb; /* add on to user time */ \ @@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ 2: #define ACCOUNT_CPU_USER_EXIT(ra, rb) \ -BEGIN_FTR_SECTION; \ - mfspr ra,SPRN_PURR; /* get processor util. reg */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ -BEGIN_FTR_SECTION; \ - MFTB(ra); /* or get TB if no PURR */ \ -END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ - ld rb,PACA_STARTPURR(r13); \ - std ra,PACA_STARTPURR(r13); \ + MFTB(ra); /* get timebase */ \ + ld rb,PACA_STARTTIME(r13); \ + std ra,PACA_STARTTIME_USER(r13); \ subf rb,rb,ra; /* subtract start value */ \ ld ra,PACA_SYSTEM_TIME(r13); \ - add ra,ra,rb; /* add on to user time */ \ - std ra,PACA_SYSTEM_TIME(r13); -#endif + add ra,ra,rb; /* add on to system time */ \ + std ra,PACA_SYSTEM_TIME(r13) + +#ifdef CONFIG_PPC_SPLPAR +#define ACCOUNT_STOLEN_TIME \ +BEGIN_FW_FTR_SECTION; \ + beq 33f; \ + /* from user - see if there are any DTL entries to process */ \ + ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ + ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ + ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \ + cmpd cr1,r11,r10; \ + beq+ cr1,33f; \ + bl .accumulate_stolen_time; \ +33: \ +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) + +#else /* CONFIG_PPC_SPLPAR */ +#define ACCOUNT_STOLEN_TIME + +#endif /* CONFIG_PPC_SPLPAR */ + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index dc779dfcf258..fe6f7c2c9c68 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm); extern void GregorianDay(struct rtc_time *tm); extern void generic_calibrate_decr(void); -extern void snapshot_timebase(void); extern void set_dec_cpu6(unsigned int val); @@ -212,12 +211,8 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); #if defined(CONFIG_VIRT_CPU_ACCOUNTING) -extern void calculate_steal_time(void); -extern void snapshot_timebases(void); #define account_process_vtime(tsk) account_process_tick(tsk, 0) #else -#define calculate_steal_time() do { } while (0) -#define snapshot_timebases() do { } while (0) #define account_process_vtime(tsk) do { } while (0) #endif -- cgit v1.2.2 From 872e439a45ed4a4bd499bc55cb0dffa74027f749 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 31 Aug 2010 01:59:53 +0000 Subject: powerpc/pseries: Re-enable dispatch trace log userspace interface Since the cpu accounting code uses the hypervisor dispatch trace log now when CONFIG_VIRT_CPU_ACCOUNTING = y, the previous commit disabled access to it via files in the /sys/kernel/debug/powerpc/dtl/ directory in that case. This restores those files. To do this, we now have a hook that the cpu accounting code will call as it processes each entry from the hypervisor dispatch trace log. The code in dtl.c now uses that to fill up its ring buffer, rather than having the hypervisor fill the ring buffer directly. This also fixes dtl_file_read() to handle overflow conditions a bit better and adds a spinlock to ensure that race conditions (multiple processes opening or reading the file concurrently) are handled correctly. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/lppaca.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index cfb85ec85750..7f5e0fefebb0 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -191,6 +191,14 @@ struct dtl_entry { #define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ #define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) +/* + * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ -- cgit v1.2.2 From 05d77ac90c0d260ae18decd70507dc4f5b71a2cb Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 21 Aug 2010 11:43:20 +0000 Subject: powerpc: Remove fpscr use from [kvm_]cvt_{fd,df} Neither lfs nor stfs touch the fpscr, so remove the restore/save of it around them. Signed-off-by: Andreas Schwab Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/kvm_fpu.h | 4 ++-- arch/powerpc/include/asm/system.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h index c3d4f0518a67..92daae132492 100644 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ b/arch/powerpc/include/asm/kvm_fpu.h @@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd) FPD_THREE_IN(fnmsub) FPD_THREE_IN(fnmadd) -extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); -extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); +extern void kvm_cvt_fd(u32 *from, u64 *to); +extern void kvm_cvt_df(u64 *from, u32 *to); #endif diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac848..0b3fe78be71b 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -154,8 +154,8 @@ extern void enable_kernel_spe(void); extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern int fix_alignment(struct pt_regs *); -extern void cvt_fd(float *from, double *to, struct thread_struct *thread); -extern void cvt_df(double *from, float *to, struct thread_struct *thread); +extern void cvt_fd(float *from, double *to); +extern void cvt_df(double *from, float *to); #ifndef CONFIG_SMP extern void discard_lazy_cpu_state(void); -- cgit v1.2.2 From cab175f9fa2973f0deb1580fca3c966fe1d3981e Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Fri, 27 Aug 2010 03:49:11 +0000 Subject: powerpc: Use is_32bit_task() helper to test 32-bit binary This patch removes all explicit tests for the TIF_32BIT flag Signed-off-by: Denis Kirjanov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/compat.h | 4 ++-- arch/powerpc/include/asm/elf.h | 2 +- arch/powerpc/include/asm/page_64.h | 4 ++-- arch/powerpc/include/asm/processor.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 396d21a80058..3369e2c83609 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -143,7 +143,7 @@ static inline void __user *compat_alloc_user_space(long len) * We cant access below the stack pointer in the 32bit ABI and * can access 288 bytes in the 64bit ABI */ - if (!(test_thread_flag(TIF_32BIT))) + if (!is_32bit_task()) usp -= 288; return (void __user *) (usp - len); @@ -213,7 +213,7 @@ struct compat_shmid64_ds { static inline int is_compat_task(void) { - return test_thread_flag(TIF_32BIT); + return is_32bit_task(); } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index c376eda15313..2b917c69ed15 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -250,7 +250,7 @@ do { \ * the 64bit ABI has never had these issues dont enable the workaround * even if we have an executable stack. */ -# define elf_read_implies_exec(ex, exec_stk) (test_thread_flag(TIF_32BIT) ? \ +# define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \ (exec_stk == EXSTACK_DEFAULT) : 0) #else # define SET_PERSONALITY(ex) \ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 358ff14ea25e..932f88dcf6fa 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -163,7 +163,7 @@ do { \ #endif /* !CONFIG_HUGETLB_PAGE */ #define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) /* @@ -179,7 +179,7 @@ do { \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ + (is_32bit_task() ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) #include diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 19c05b0f74be..4c14187ba02d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -118,7 +118,7 @@ extern struct task_struct *last_task_used_spe; #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4)) -#define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)) ? \ +#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) #endif @@ -128,7 +128,7 @@ extern struct task_struct *last_task_used_spe; #define STACK_TOP_USER64 TASK_SIZE_USER64 #define STACK_TOP_USER32 TASK_SIZE_USER32 -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ +#define STACK_TOP (is_32bit_task() ? \ STACK_TOP_USER32 : STACK_TOP_USER64) #define STACK_TOP_MAX STACK_TOP_USER64 -- cgit v1.2.2 From 5b6e9ff6deb703b95fb355bb66d86096c1a2df09 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 30 Aug 2010 19:23:52 +0000 Subject: powerpc/dma: Add optional platform override of dma_set_mask() Some platforms may want to override dma_set_mask() to take into account some specific "features" such as the availability of a direct-map window in addition to an iommu. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/dma-mapping.h | 14 +------------- arch/powerpc/include/asm/machdep.h | 3 +++ 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8c9c6ad2004e..6d2416a85709 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -127,19 +127,7 @@ static inline int dma_supported(struct device *dev, u64 mask) return dma_ops->dma_supported(dev, mask); } -static inline int dma_set_mask(struct device *dev, u64 dma_mask) -{ - struct dma_map_ops *dma_ops = get_dma_ops(dev); - - if (unlikely(dma_ops == NULL)) - return -EIO; - if (dma_ops->set_dma_mask != NULL) - return dma_ops->set_dma_mask(dev, dma_mask); - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - *dev->dma_mask = dma_mask; - return 0; -} +extern int dma_set_mask(struct device *dev, u64 dma_mask); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index adc8e6cdf339..d045b0145537 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -102,6 +102,9 @@ struct machdep_calls { void (*pci_dma_dev_setup)(struct pci_dev *dev); void (*pci_dma_bus_setup)(struct pci_bus *bus); + /* Platform set_dma_mask override */ + int (*dma_set_mask)(struct device *dev, u64 dma_mask); + int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ void (*init_early)(void); -- cgit v1.2.2 From 86250b9d12caa1a3dee12a7cf638b7dd70eaadb6 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 25 Aug 2010 18:50:28 +0000 Subject: powerpc: Wire up direct socket system calls This patch wires up the various socket system calls on PowerPC so that userspace can call them directly, rather than by going through the multiplexed socketcall system call. Signed-off-by: Ian Munsie Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/systbl.h | 19 +++++++++++++++++++ arch/powerpc/include/asm/unistd.h | 21 ++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 3d212669a130..aa0f1ebb4aaf 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -329,3 +329,22 @@ COMPAT_SYS(rt_tgsigqueueinfo) SYSCALL(fanotify_init) COMPAT_SYS(fanotify_mark) SYSCALL_SPU(prlimit64) +SYSCALL_SPU(socket) +SYSCALL_SPU(bind) +SYSCALL_SPU(connect) +SYSCALL_SPU(listen) +SYSCALL_SPU(accept) +SYSCALL_SPU(getsockname) +SYSCALL_SPU(getpeername) +SYSCALL_SPU(socketpair) +SYSCALL_SPU(send) +SYSCALL_SPU(sendto) +COMPAT_SYS_SPU(recv) +COMPAT_SYS_SPU(recvfrom) +SYSCALL_SPU(shutdown) +COMPAT_SYS_SPU(setsockopt) +COMPAT_SYS_SPU(getsockopt) +COMPAT_SYS_SPU(sendmsg) +COMPAT_SYS_SPU(recvmsg) +COMPAT_SYS_SPU(recvmmsg) +SYSCALL_SPU(accept4) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 597e6f9d094a..6151937657f6 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -348,10 +348,29 @@ #define __NR_fanotify_init 323 #define __NR_fanotify_mark 324 #define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 #ifdef __KERNEL__ -#define __NR_syscalls 326 +#define __NR_syscalls 345 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls -- cgit v1.2.2 From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Oct 2010 14:08:55 +0100 Subject: Fix IRQ flag handling naming Fix the IRQ flag handling naming. In linux/irqflags.h under one configuration, it maps: local_irq_enable() -> raw_local_irq_enable() local_irq_disable() -> raw_local_irq_disable() local_irq_save() -> raw_local_irq_save() ... and under the other configuration, it maps: raw_local_irq_enable() -> local_irq_enable() raw_local_irq_disable() -> local_irq_disable() raw_local_irq_save() -> local_irq_save() ... This is quite confusing. There should be one set of names expected of the arch, and this should be wrapped to give another set of names that are expected by users of this facility. Change this to have the arch provide: flags = arch_local_save_flags() flags = arch_local_irq_save() arch_local_irq_restore(flags) arch_local_irq_disable() arch_local_irq_enable() arch_irqs_disabled_flags(flags) arch_irqs_disabled() arch_safe_halt() Then linux/irqflags.h wraps these to provide: raw_local_save_flags(flags) raw_local_irq_save(flags) raw_local_irq_restore(flags) raw_local_irq_disable() raw_local_irq_enable() raw_irqs_disabled_flags(flags) raw_irqs_disabled() raw_safe_halt() with type checking on the flags 'arguments', and then wraps those to provide: local_save_flags(flags) local_irq_save(flags) local_irq_restore(flags) local_irq_disable() local_irq_enable() irqs_disabled_flags(flags) irqs_disabled() safe_halt() with tracing included if enabled. The arch functions can now all be inline functions rather than some of them having to be macros. Signed-off-by: David Howells [X86, FRV, MN10300] Signed-off-by: Chris Metcalf [Tile] Signed-off-by: Michal Simek [Microblaze] Tested-by: Catalin Marinas [ARM] Acked-by: Thomas Gleixner Acked-by: Haavard Skinnemoen [AVR] Acked-by: Tony Luck [IA-64] Acked-by: Hirokazu Takata [M32R] Acked-by: Greg Ungerer [M68K/M68KNOMMU] Acked-by: Ralf Baechle [MIPS] Acked-by: Kyle McMartin [PA-RISC] Acked-by: Paul Mackerras [PowerPC] Acked-by: Martin Schwidefsky [S390] Acked-by: Chen Liqin [Score] Acked-by: Matt Fleming [SH] Acked-by: David S. Miller [Sparc] Acked-by: Chris Zankel [Xtensa] Reviewed-by: Richard Henderson [Alpha] Reviewed-by: Yoshinori Sato [H8300] Cc: starvik@axis.com [CRIS] Cc: jesper.nilsson@axis.com [CRIS] Cc: linux-cris-kernel@axis.com --- arch/powerpc/include/asm/hw_irq.h | 113 +++++++++++++++++++++--------------- arch/powerpc/include/asm/irqflags.h | 2 +- 2 files changed, 66 insertions(+), 49 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index bd100fcf40d0..ff08b70b36d4 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -16,42 +16,57 @@ extern void timer_interrupt(struct pt_regs *); #ifdef CONFIG_PPC64 #include -static inline unsigned long local_get_flags(void) +static inline unsigned long arch_local_save_flags(void) { unsigned long flags; - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); + asm volatile( + "lbz %0,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled))); return flags; } -static inline unsigned long raw_local_irq_disable(void) +static inline unsigned long arch_local_irq_disable(void) { unsigned long flags, zero; - __asm__ __volatile__("li %1,0; lbz %0,%2(13); stb %1,%2(13)" - : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)) - : "memory"); + asm volatile( + "li %1,0; lbz %0,%2(13); stb %1,%2(13)" + : "=r" (flags), "=&r" (zero) + : "i" (offsetof(struct paca_struct, soft_enabled)) + : "memory"); return flags; } -extern void raw_local_irq_restore(unsigned long); +extern void arch_local_irq_restore(unsigned long); extern void iseries_handle_interrupts(void); -#define raw_local_irq_enable() raw_local_irq_restore(1) -#define raw_local_save_flags(flags) ((flags) = local_get_flags()) -#define raw_local_irq_save(flags) ((flags) = raw_local_irq_disable()) +static inline void arch_local_irq_enable(void) +{ + arch_local_irq_restore(1); +} + +static inline unsigned long arch_local_irq_save(void) +{ + return arch_local_irq_disable(); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) +{ + return flags == 0; +} -#define raw_irqs_disabled() (local_get_flags() == 0) -#define raw_irqs_disabled_flags(flags) ((flags) == 0) +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} #ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() __asm__ __volatile__("wrteei 1": : :"memory"); -#define __hard_irq_disable() __asm__ __volatile__("wrteei 0": : :"memory"); +#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); +#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); #else #define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) #define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) @@ -64,64 +79,66 @@ extern void iseries_handle_interrupts(void); get_paca()->hard_enabled = 0; \ } while(0) -#else +#else /* CONFIG_PPC64 */ -#if defined(CONFIG_BOOKE) #define SET_MSR_EE(x) mtmsr(x) -#define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") + +static inline unsigned long arch_local_save_flags(void) +{ + return mfmsr(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ +#if defined(CONFIG_BOOKE) + asm volatile("wrtee %0" : : "r" (flags) : "memory"); #else -#define SET_MSR_EE(x) mtmsr(x) -#define raw_local_irq_restore(flags) mtmsr(flags) + mtmsr(flags); #endif +} -static inline void raw_local_irq_disable(void) +static inline unsigned long arch_local_irq_save(void) { + unsigned long flags = arch_local_save_flags(); #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 0": : :"memory"); + asm volatile("wrteei 0" : : : "memory"); #else - unsigned long msr; - - msr = mfmsr(); - SET_MSR_EE(msr & ~MSR_EE); + SET_MSR_EE(flags & ~MSR_EE); #endif + return flags; } -static inline void raw_local_irq_enable(void) +static inline void arch_local_irq_disable(void) { #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 1": : :"memory"); + asm volatile("wrteei 0" : : : "memory"); #else - unsigned long msr; - - msr = mfmsr(); - SET_MSR_EE(msr | MSR_EE); + arch_local_irq_save(); #endif } -static inline void raw_local_irq_save_ptr(unsigned long *flags) +static inline void arch_local_irq_enable(void) { - unsigned long msr; - msr = mfmsr(); - *flags = msr; #ifdef CONFIG_BOOKE - __asm__ __volatile__("wrteei 0": : :"memory"); + asm volatile("wrteei 1" : : : "memory"); #else - SET_MSR_EE(msr & ~MSR_EE); + unsigned long msr = mfmsr(); + SET_MSR_EE(msr | MSR_EE); #endif } -#define raw_local_save_flags(flags) ((flags) = mfmsr()) -#define raw_local_irq_save(flags) raw_local_irq_save_ptr(&flags) -#define raw_irqs_disabled() ((mfmsr() & MSR_EE) == 0) -#define raw_irqs_disabled_flags(flags) (((flags) & MSR_EE) == 0) - -#define hard_irq_disable() raw_local_irq_disable() - -static inline int irqs_disabled_flags(unsigned long flags) +static inline bool arch_irqs_disabled_flags(unsigned long flags) { return (flags & MSR_EE) == 0; } +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#define hard_irq_disable() arch_local_irq_disable() + #endif /* CONFIG_PPC64 */ /* diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 5f68ecfdf516..b85d8ddbb666 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -6,7 +6,7 @@ #ifndef __ASSEMBLY__ /* - * Get definitions for raw_local_save_flags(x), etc. + * Get definitions for arch_local_save_flags(x), etc. */ #include -- cgit v1.2.2 From d8862be1229534aac1768b8ac663e8fb2bb6ddf6 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 10 Sep 2010 09:41:35 +0000 Subject: powerpc/pseries: Export rtas_ibm_suspend_me() Export the rtas_ibm_suspend_me() routine. This is needed to perform partition migration in the kernel. Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3d35f8ae377e..9a1193e30f26 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -187,6 +187,7 @@ extern void rtas_progress(char *s, unsigned short hex); extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); +extern int rtas_ibm_suspend_me(struct rtas_args *); struct rtc_time; extern unsigned long rtas_get_boot_time(void); -- cgit v1.2.2 From dda804ad4023cc202466c46fcfcc163131953838 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 15 Sep 2010 08:13:19 +0000 Subject: powerpc/pci: Fix return type of BUID_{HI,LO} macros BUID_HI and BUID_LO are used to pass data to call_rtas, which expects ints or u32s. But the macro doesn't cast the return, so the result is still u64. Use the upper_32_bits and lower_32_bits macros that have been added to kernel.h. Found by getting printf format errors trying to debug print the args, no actual code change for 64 bit kernels where the macros are actually used. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Acked-by: Linas Vepstas Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 42fdff0e4b32..43268f15004e 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -28,8 +28,8 @@ extern void find_and_init_phbs(void); extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ -#define BUID_HI(buid) ((buid) >> 32) -#define BUID_LO(buid) ((buid) & 0xffffffff) +#define BUID_HI(buid) upper_32_bits(buid) +#define BUID_LO(buid) lower_32_bits(buid) /* PCI device_node operations */ struct device_node; -- cgit v1.2.2 From c71635d288ffd3bcdfb30308f681f9af34f0fc81 Mon Sep 17 00:00:00 2001 From: Matthew McClintock Date: Thu, 16 Sep 2010 17:58:23 -0500 Subject: powerpc/kexec: make masking/disabling interrupts generic Right now just the kexec crash pathway turns turns off the interrupts. Pull that out and make a generic version for use elsewhere Signed-off-by: Matthew McClintock Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/kexec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 076327f2eff7..f54408d995b5 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -91,6 +91,7 @@ extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); +extern void machine_kexec_mask_interrupts(void); #else /* !CONFIG_KEXEC */ static inline int kexec_sr_activated(int cpu) { return 0; } -- cgit v1.2.2 From 92437d41374bf59b1914b53bd10ca69d31b1b581 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 24 Sep 2010 12:44:52 -0400 Subject: powerpc: Fix invalid page flags in create TLB CAM path for PTE_64BIT There exists a four line chunk of code, which when configured for 64 bit address space, can incorrectly set certain page flags during the TLB creation. It turns out that this is code which isn't used, but might still serve a purpose. Since it isn't obvious why it exists or why it causes problems, the below description covers both in detail. For powerpc bootstrap, the physical memory (at most 768M), is mapped into the kernel space via the following path: MMU_init() | + adjust_total_lowmem() | + map_mem_in_cams() | + settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); On settlbcam(), the kernel will create TLB entries according to the flag, PAGE_KERNEL_X. settlbcam() { ... TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); ^ These entries cannot be invalidated by the kernel since MAS1_IPROT is set on TLB property. ... if (flags & _PAGE_USER) { TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); } For classic BookE (flags & _PAGE_USER) is 'zero' so it's fine. But on boards like the the Freescale P4080, we want to support 36-bit physical address on it. So the following options may be set: CONFIG_FSL_BOOKE=y CONFIG_PTE_64BIT=y CONFIG_PHYS_64BIT=y As a result, boards like the P4080 will introduce PTE format as Book3E. As per the file: arch/powerpc/include/asm/pgtable-ppc32.h * #elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT) * #include So PAGE_KERNEL_X is __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) and the book3E version of _PAGE_KERNEL_RWX is defined with: (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) Note the _PAGE_BAP_SR, which is also defined in the book3E _PAGE_USER: #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ So the possibility exists to wrongly assign the user MAS3_U bits to kernel (PAGE_KERNEL_X) address space via the following code fragment: if (flags & _PAGE_USER) { TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); } Here is a dump of the TLB info from Simics with the above code present: ------ L2 TLB1 GT SSS UUU V I Row Logical Physical SS TLPID TID WIMGE XWR XWR F P V ----- ----------------- ------------------- -- ----- ----- ----- --- --- - - - 0 c0000000-cfffffff 000000000-00fffffff 00 0 0 M XWR XWR 0 1 1 1 d0000000-dfffffff 010000000-01fffffff 00 0 0 M XWR XWR 0 1 1 2 e0000000-efffffff 020000000-02fffffff 00 0 0 M XWR XWR 0 1 1 Actually this conditional code was used for two legacy functions: 1: support KGDB to set break point. KGDB already dropped this; now uses its core write to set break point. 2: io_block_mapping() to create TLB in segmentation size (not PAGE_SIZE) for device IO space. This use case is also removed from the latest PowerPC kernel. However, there may still be a use case for it in the future, like large user pages, so we can't remove it entirely. As an alternative, we match on all bits of _PAGE_USER instead of just any bits, so the case where just _PAGE_BAP_SR is set can't sneak through. With this done, the TLB appears without U having XWR as below: ------- L2 TLB1 GT SSS UUU V I Row Logical Physical SS TLPID TID WIMGE XWR XWR F P V ----- ----------------- ------------------- -- ----- ----- ----- --- --- - - - 0 c0000000-cfffffff 000000000-00fffffff 00 0 0 M XWR 0 1 1 1 d0000000-dfffffff 010000000-01fffffff 00 0 0 M XWR 0 1 1 2 e0000000-efffffff 020000000-02fffffff 00 0 0 M XWR 0 1 1 Signed-off-by: Tiejun Chen Signed-off-by: Paul Gortmaker Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/pte-common.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index f2b370180a09..76bb195e4f24 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -171,6 +171,13 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); /* Make modules code happy. We don't set RO yet */ #define PAGE_KERNEL_EXEC PAGE_KERNEL_X +/* + * Don't just check for any non zero bits in __PAGE_USER, since for book3e + * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in + * _PAGE_USER. Need to explictly match _PAGE_BAP_UR bit in that case too. + */ +#define pte_user(val) ((val & _PAGE_USER) == _PAGE_USER) + /* Advertise special mapping type for AGP */ #define PAGE_AGP (PAGE_KERNEL_NC) #define HAVE_PAGE_AGP -- cgit v1.2.2 From 6db92cc9d07db9f713da8554b4bcdfc8e54ad386 Mon Sep 17 00:00:00 2001 From: Harninder Rai Date: Wed, 13 Oct 2010 17:30:56 +0530 Subject: powerpc/85xx: add cache-sram support It adds cache-sram support in P1/P2 QorIQ platforms as under: * A small abstraction over powerpc's remote heap allocator * Exports mpc85xx_cache_sram_alloc()/free() APIs * Supports only one contiguous SRAM window * Drivers can do the following in Kconfig to use these APIs "select FSL_85XX_CACHE_SRAM if MPC85xx" * Required SRAM size and the offset where SRAM should be mapped must be provided at kernel command line as : cache-sram-size= cache-sram-offset= Signed-off-by: Harninder Rai Signed-off-by: Vivek Mahajan Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/fsl_85xx_cache_sram.h | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 arch/powerpc/include/asm/fsl_85xx_cache_sram.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h new file mode 100644 index 000000000000..2af2bdc37b2e --- /dev/null +++ b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h @@ -0,0 +1,48 @@ +/* + * Copyright 2009 Freescale Semiconductor, Inc. + * + * Cache SRAM handling for QorIQ platform + * + * Author: Vivek Mahajan + + * This file is derived from the original work done + * by Sylvain Munaut for the Bestcomm SRAM allocator. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__ +#define __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__ + +#include +#include + +/* + * Cache-SRAM + */ + +struct mpc85xx_cache_sram { + phys_addr_t base_phys; + void *base_virt; + unsigned int size; + rh_info_t *rh; + spinlock_t lock; +}; + +extern void mpc85xx_cache_sram_free(void *ptr); +extern void *mpc85xx_cache_sram_alloc(unsigned int size, + phys_addr_t *phys, unsigned int align); + +#endif /* __AMS_POWERPC_FSL_85XX_CACHE_SRAM_H__ */ -- cgit v1.2.2 From 988cf86d4f0da4150e808300c145ba87c0aad02f Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 8 Oct 2010 02:13:25 -0500 Subject: powerpc/fsl-booke: Add support for FSL Arch v1.0 MMU in setup_page_sizes Update setup_page_sizes() to support for a MMU v1.0 FSL style MMU implementation. In such a processor, we don't have TLB0PS or EPTCFG registers (and access to these registers may cause exceptions). We need to parse the older format of TLBnCFG for page size support. Additionaly, assume since we are an FSL implementation that we have 2 TLB arrays and the second array contains the variable size pages. Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/mmu-book3e.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 87a1d787c5b6..8eaed81ea642 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -114,6 +114,17 @@ #define MAS7_RPN 0xFFFFFFFF +/* Bit definitions for MMUCFG */ +#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ +#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ +#define MMUCFG_MAVN_V2 0x00000001 /* v2.0 */ +#define MMUCFG_NTLBS 0x0000000c /* Number of TLBs */ +#define MMUCFG_PIDSIZE 0x000007c0 /* PID Reg Size */ +#define MMUCFG_TWC 0x00008000 /* TLB Write Conditional (v2.0) */ +#define MMUCFG_LRAT 0x00010000 /* LRAT Supported (v2.0) */ +#define MMUCFG_RASIZE 0x00fe0000 /* Real Addr Size */ +#define MMUCFG_LPIDSIZE 0x0f000000 /* LPID Reg Size */ + /* Bit definitions for MMUCSR0 */ #define MMUCSR0_TLB1FI 0x00000002 /* TLB1 Flash invalidate */ #define MMUCSR0_TLB0FI 0x00000004 /* TLB0 Flash invalidate */ @@ -133,6 +144,10 @@ #define TLBnCFG_GTWE 0x00010000 /* Guest can write */ #define TLBnCFG_IND 0x00020000 /* IND entries supported */ #define TLBnCFG_PT 0x00040000 /* Can load from page table */ +#define TLBnCFG_MINSIZE 0x00f00000 /* Minimum Page Size (v1.0) */ +#define TLBnCFG_MINSIZE_SHIFT 20 +#define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */ +#define TLBnCFG_MAXSIZE_SHIFT 16 #define TLBnCFG_ASSOC 0xff000000 /* Associativity */ /* TLBnPS encoding */ -- cgit v1.2.2 From 50a23e6eec6f20d55a3a920e47adb455bff6046e Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Sat, 16 Oct 2010 10:36:23 -0700 Subject: Update broken web addresses in arch directory. The patch below updates broken web addresses in the arch directory. Signed-off-by: Justin P. Mattock Signed-off-by: Maciej W. Rozycki Cc: Finn Thain Cc: Randy Dunlap Reviewed-by: Finn Thain Signed-off-by: Jiri Kosina --- arch/powerpc/include/asm/hydra.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 1ad4eed07fbe..5b0c98bd46ab 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -10,7 +10,7 @@ * * © Copyright 1995 Apple Computer, Inc. All rights reserved. * - * It's available online from http://chrp.apple.com/MacTech.pdf. + * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf * You can obtain paper copies of this book from computer bookstores or by * writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San * Francisco, CA 94104. Reference ISBN 1-55860-393-X. -- cgit v1.2.2 From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Oct 2010 14:01:34 +0800 Subject: irq_work: Add generic hardirq context callbacks Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra Acked-by: Kyle McMartin Acked-by: Martin Schwidefsky [ various fixes ] Signed-off-by: Huang Ying LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/paca.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7faf..9b287fdd8ea3 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -129,7 +129,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_event_pending; /* PM interrupt while soft-disabled */ + u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ -- cgit v1.2.2 From e1e10a265d28273ab8c70be19d43dcbdeead6c5a Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:17 -0700 Subject: sched: Consolidate account_system_vtime extern declaration Just a minor cleanup patch that makes things easier to the following patches. No functionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-3-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/system.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac848..9c3d160670b4 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void account_system_vtime(struct task_struct *); -#endif - extern struct dentry *powerpc_debugfs_root; #endif /* __KERNEL__ */ -- cgit v1.2.2 From 96bc451a153297bf1f99ef2d633d512ea349ae7a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:42 +0200 Subject: KVM: PPC: Introduce shared page For transparent variable sharing between the hypervisor and guest, I introduce a shared page. This shared page will contain all the registers the guest can read and write safely without exiting guest context. This patch only implements the stubs required for the basic structure of the shared page. The actual register moving follows. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_para.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b0b23c007d6e..53edacdf6940 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #define KVM_MAX_VCPUS 1 @@ -290,6 +291,7 @@ struct kvm_vcpu_arch { struct tasklet_struct tasklet; u64 dec_jiffies; unsigned long pending_exceptions; + struct kvm_vcpu_arch_shared *shared; #ifdef CONFIG_PPC_BOOK3S struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 2d48f6a63d0b..1485ba87a52a 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -20,6 +20,11 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ +#include + +struct kvm_vcpu_arch_shared { +}; + #ifdef __KERNEL__ static inline int kvm_para_available(void) -- cgit v1.2.2 From 666e7252a15b7fc4a116e65deaf6da5e4ce660e3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:43 +0200 Subject: KVM: PPC: Convert MSR to shared page One of the most obvious registers to share with the guest directly is the MSR. The MSR contains the "interrupts enabled" flag which the guest has to toggle in critical sections. So in order to bring the overhead of interrupt en- and disabling down, let's put msr into the shared page. Keep in mind that even though you can fully read its contents, writing to it doesn't always update all state. There are a few safe fields that don't require hypervisor interaction. See the documentation for a list of MSR bits that are safe to be set from inside the guest. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/include/asm/kvm_para.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 53edacdf6940..ba20f90655f3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -211,7 +211,6 @@ struct kvm_vcpu_arch { u32 cr; #endif - ulong msr; #ifdef CONFIG_PPC_BOOK3S ulong shadow_msr; ulong hflags; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 1485ba87a52a..a17dc5229d99 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -23,6 +23,7 @@ #include struct kvm_vcpu_arch_shared { + __u64 msr; }; #ifdef __KERNEL__ -- cgit v1.2.2 From d562de48de68b60b3d2522e7d8273d7112034ee6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:44 +0200 Subject: KVM: PPC: Convert DSISR to shared page The DSISR register contains information about a data page fault. It is fully read/write from inside the guest context and we don't need to worry about interacting based on writes of this register. This patch converts all users of the current field to the shared page. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s.h | 1 - arch/powerpc/include/asm/kvm_para.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8274a2d43925..b5b196166455 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -85,7 +85,6 @@ struct kvmppc_vcpu_book3s { u64 hid[6]; u64 gqr[8]; int slb_nr; - u32 dsisr; u64 sdr1; u64 hior; u64 msr_mask; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index a17dc5229d99..9f7565b1de65 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -24,6 +24,7 @@ struct kvm_vcpu_arch_shared { __u64 msr; + __u32 dsisr; }; #ifdef __KERNEL__ -- cgit v1.2.2 From 5e030186dfc4e4e47c84d2557b17e4aa06c76f96 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:45 +0200 Subject: KVM: PPC: Convert DAR to shared page. The DAR register contains the address a data page fault occured at. This register behaves pretty much like a simple data storage register that gets written to on data faults. There is no hypervisor interaction required on read or write. This patch converts all users of the current field to the shared page. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/include/asm/kvm_para.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ba20f90655f3..c852408eac38 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -231,7 +231,6 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; - ulong dear; ulong esr; u32 dec; u32 decar; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 9f7565b1de65..ec72a1c8c045 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -23,6 +23,7 @@ #include struct kvm_vcpu_arch_shared { + __u64 dar; __u64 msr; __u32 dsisr; }; -- cgit v1.2.2 From de7906c36ca1e22a3e3600e95c6a4e2c1e4e2e9c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:46 +0200 Subject: KVM: PPC: Convert SRR0 and SRR1 to shared page The SRR0 and SRR1 registers contain cached values of the PC and MSR respectively. They get written to by the hypervisor when an interrupt occurs or directly by the kernel. They are also used to tell the rfi(d) instruction where to jump to. Because it only gets touched on defined events that, it's very simple to share with the guest. Hypervisor and guest both have full r/w access. This patch converts all users of the current field to the shared page. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 -- arch/powerpc/include/asm/kvm_para.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c852408eac38..5255d754f9a9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -225,8 +225,6 @@ struct kvm_vcpu_arch { ulong sprg5; ulong sprg6; ulong sprg7; - ulong srr0; - ulong srr1; ulong csrr0; ulong csrr1; ulong dsrr0; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index ec72a1c8c045..d7fc6c2c9730 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -23,6 +23,8 @@ #include struct kvm_vcpu_arch_shared { + __u64 srr0; + __u64 srr1; __u64 dar; __u64 msr; __u32 dsisr; -- cgit v1.2.2 From a73a9599e03eef1324d5aeecaebc1b339d2e1664 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:47 +0200 Subject: KVM: PPC: Convert SPRG[0-4] to shared page When in kernel mode there are 4 additional registers available that are simple data storage. Instead of exiting to the hypervisor to read and write those, we can just share them with the guest using the page. This patch converts all users of the current field to the shared page. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 4 ---- arch/powerpc/include/asm/kvm_para.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5255d754f9a9..221cf85e9a6e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -217,10 +217,6 @@ struct kvm_vcpu_arch { ulong guest_owned_ext; #endif u32 mmucr; - ulong sprg0; - ulong sprg1; - ulong sprg2; - ulong sprg3; ulong sprg4; ulong sprg5; ulong sprg6; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index d7fc6c2c9730..e402999ba193 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -23,6 +23,10 @@ #include struct kvm_vcpu_arch_shared { + __u64 sprg0; + __u64 sprg1; + __u64 sprg2; + __u64 sprg3; __u64 srr0; __u64 srr1; __u64 dar; -- cgit v1.2.2 From 2a342ed57756ad5d8af5456959433884367e5ab2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:48 +0200 Subject: KVM: PPC: Implement hypervisor interface To communicate with KVM directly we need to plumb some sort of interface between the guest and KVM. Usually those interfaces use hypercalls. This hypercall implementation is described in the last patch of the series in a special documentation file. Please read that for further information. This patch implements stubs to handle KVM PPC hypercalls on the host and guest side alike. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_para.h | 114 +++++++++++++++++++++++++++++++++++- arch/powerpc/include/asm/kvm_ppc.h | 1 + 2 files changed, 114 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index e402999ba193..556fd59ee0f1 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -21,6 +21,7 @@ #define __POWERPC_KVM_PARA_H__ #include +#include struct kvm_vcpu_arch_shared { __u64 sprg0; @@ -34,16 +35,127 @@ struct kvm_vcpu_arch_shared { __u32 dsisr; }; +#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ +#define HC_VENDOR_KVM (42 << 16) +#define HC_EV_SUCCESS 0 +#define HC_EV_UNIMPLEMENTED 12 + #ifdef __KERNEL__ +#ifdef CONFIG_KVM_GUEST + +static inline int kvm_para_available(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} + +extern unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr); + +#else + static inline int kvm_para_available(void) { return 0; } +static unsigned long kvm_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + return HC_EV_UNIMPLEMENTED; +} + +#endif + +static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) +{ + unsigned long in[8]; + unsigned long out[8]; + unsigned long r; + + r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + *r2 = out[0]; + + return r; +} + +static inline long kvm_hypercall0(unsigned int nr) +{ + unsigned long in[8]; + unsigned long out[8]; + + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + in[3] = p4; + return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); +} + + static inline unsigned int kvm_arch_para_features(void) { - return 0; + unsigned long r; + + if (!kvm_para_available()) + return 0; + + if(kvm_hypercall0_1(KVM_HC_FEATURES, &r)) + return 0; + + return r; } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 18d139ec2d22..ecb3bc74c344 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); +extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); /* * Cuts out inst bits with ordering according to spec. -- cgit v1.2.2 From 5c6cedf488a1144ac4f683f3ea1a642533d1dcd2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:49 +0200 Subject: KVM: PPC: Add PV guest critical sections When running in hooked code we need a way to disable interrupts without clobbering any interrupts or exiting out to the hypervisor. To achieve this, we have an additional critical field in the shared page. If that field is equal to the r1 register of the guest, it tells the hypervisor that we're in such a critical section and thus may not receive any interrupts. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 556fd59ee0f1..4577e7b6dff1 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -24,6 +24,7 @@ #include struct kvm_vcpu_arch_shared { + __u64 critical; /* Guest may not get interrupts if == r1 */ __u64 sprg0; __u64 sprg1; __u64 sprg2; -- cgit v1.2.2 From fad93fe1d452960eb838109222cc949eb77f2859 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:50 +0200 Subject: KVM: PPC: Add PV guest scratch registers While running in hooked code we need to store register contents out because we must not clobber any registers. So let's add some fields to the shared page we can just happily write to. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_para.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 4577e7b6dff1..5be00c9533d2 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -24,6 +24,9 @@ #include struct kvm_vcpu_arch_shared { + __u64 scratch1; + __u64 scratch2; + __u64 scratch3; __u64 critical; /* Guest may not get interrupts if == r1 */ __u64 sprg0; __u64 sprg1; -- cgit v1.2.2 From 90bba358873dc96a6746f0df453a0a8ca3d6b86e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:51 +0200 Subject: KVM: PPC: Tell guest about pending interrupts When the guest turns on interrupts again, it needs to know if we have an interrupt pending for it. Because if so, it should rather get out of guest context and get the interrupt. So we introduce a new field in the shared page that we use to tell the guest that there's a pending interrupt lying around. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 5be00c9533d2..0653b0d238b4 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -37,6 +37,7 @@ struct kvm_vcpu_arch_shared { __u64 dar; __u64 msr; __u32 dsisr; + __u32 int_pending; /* Tells the guest if we have an interrupt */ }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ -- cgit v1.2.2 From 28e83b4fa7f8bd114940fa933ac8cbe80969eba2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:52 +0200 Subject: KVM: PPC: Make PAM a define On PowerPC it's very normal to not support all of the physical RAM in real mode. To check if we're matching on the shared page or not, we need to know the limits so we can restrain ourselves to that range. So let's make it a define instead of open-coding it. And while at it, let's also increase it. Signed-off-by: Alexander Graf v2 -> v3: - RMO -> PAM (non-magic page) Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 221cf85e9a6e..1674da8134cb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -48,6 +48,9 @@ #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) +/* Physical Address Mask - allowed range of real mode RAM access */ +#define KVM_PAM 0x0fffffffffffffffULL + struct kvm; struct kvm_run; struct kvm_vcpu; -- cgit v1.2.2 From beb03f14da9ceff76ff08cbb8af064b52dc21f7e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:53 +0200 Subject: KVM: PPC: First magic page steps We will be introducing a method to project the shared page in guest context. As soon as we're talking about this coupling, the shared page is colled magic page. This patch introduces simple defines, so the follow-up patches are easier to read. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1674da8134cb..e1da77579e65 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -287,6 +287,8 @@ struct kvm_vcpu_arch { u64 dec_jiffies; unsigned long pending_exceptions; struct kvm_vcpu_arch_shared *shared; + unsigned long magic_page_pa; /* phys addr to map the magic page to */ + unsigned long magic_page_ea; /* effect. addr to map the magic page to */ #ifdef CONFIG_PPC_BOOK3S struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; -- cgit v1.2.2 From e8508940a88691ad3d1c46608cd968eb4be9cbc5 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:54 +0200 Subject: KVM: PPC: Magic Page Book3s support We need to override EA as well as PA lookups for the magic page. When the guest tells us to project it, the magic page overrides any guest mappings. In order to reflect that, we need to hook into all the MMU layers of KVM to force map the magic page if necessary. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index b5b196166455..00cf8b07e502 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -130,6 +130,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); extern u32 kvmppc_trampoline_lowmem; extern u32 kvmppc_trampoline_enter; -- cgit v1.2.2 From 5fc87407b55f5799418f4dc5931232c2bc06d077 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 14:47:55 +0200 Subject: KVM: PPC: Expose magic page support to guest Now that we have the shared page in place and the MMU code knows about the magic page, we can expose that capability to the guest! Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_para.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 0653b0d238b4..7438ab360120 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -45,6 +45,8 @@ struct kvm_vcpu_arch_shared { #define HC_EV_SUCCESS 0 #define HC_EV_UNIMPLEMENTED 12 +#define KVM_FEATURE_MAGIC_PAGE 1 + #ifdef __KERNEL__ #ifdef CONFIG_KVM_GUEST -- cgit v1.2.2 From 2e0908afaf03675d22e40ce45a66b8d2070214ac Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:17 +0200 Subject: KVM: PPC: RCU'ify the Book3s MMU So far we've been running all code without locking of any sort. This wasn't really an issue because I didn't see any parallel access to the shadow MMU code coming. But then I started to implement dirty bitmapping to MOL which has the video code in its own thread, so suddenly we had the dirty bitmap code run in parallel to the shadow mmu code. And with that came trouble. So I went ahead and made the MMU modifying functions as parallelizable as I could think of. I hope I didn't screw up too much RCU logic :-). If you know your way around RCU and locking and what needs to be done when, please take a look at this patch. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e1da77579e65..fafc71aa3343 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -165,6 +165,7 @@ struct hpte_cache { struct hlist_node list_pte; struct hlist_node list_vpte; struct hlist_node list_vpte_long; + struct rcu_head rcu_head; u64 host_va; u64 pfn; ulong slot; @@ -295,6 +296,7 @@ struct kvm_vcpu_arch { struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; int hpte_cache_count; + spinlock_t mmu_lock; #endif }; -- cgit v1.2.2 From 2d27fc5eac0205588cb59ae138062e5e96695276 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:19 +0200 Subject: KVM: PPC: Add book3s_32 tlbie flush acceleration On Book3s_32 the tlbie instruction flushed effective addresses by the mask 0x0ffff000. This is pretty hard to reflect with a hash that hashes ~0xfff, so to speed up that target we should also keep a special hash around for it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fafc71aa3343..bba3b9b72a39 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -42,9 +42,11 @@ #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 +#define HPTEG_HASH_BITS_PTE_LONG 12 #define HPTEG_HASH_BITS_VPTE 13 #define HPTEG_HASH_BITS_VPTE_LONG 5 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) +#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) @@ -163,6 +165,7 @@ struct kvmppc_mmu { struct hpte_cache { struct hlist_node list_pte; + struct hlist_node list_pte_long; struct hlist_node list_vpte; struct hlist_node list_vpte_long; struct rcu_head rcu_head; @@ -293,6 +296,7 @@ struct kvm_vcpu_arch { #ifdef CONFIG_PPC_BOOK3S struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; + struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; int hpte_cache_count; -- cgit v1.2.2 From 2b05d71fefc3b83e686bead355c6d35e440c4261 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:21 +0200 Subject: KVM: PPC: Make long relocations be ulong On Book3S KVM we directly expose some asm pointers to C code as variables. These need to be relocated and thus break on relocatable kernels. To make sure we can at least build, let's mark them as long instead of u32 where 64bit relocations don't work. This fixes the following build error: WARNING: 2 bad relocations^M > c000000000008590 R_PPC64_ADDR32 .text+0x4000000000008460^M > c000000000008594 R_PPC64_ADDR32 .text+0x4000000000008598^M Please keep in mind that actually using KVM on a relocated kernel might still break. This only fixes the compile problem. Reported-by: Subrata Modak Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 00cf8b07e502..f04f516c97da 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -132,8 +132,8 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -extern u32 kvmppc_trampoline_lowmem; -extern u32 kvmppc_trampoline_enter; +extern ulong kvmppc_trampoline_lowmem; +extern ulong kvmppc_trampoline_enter; extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); -- cgit v1.2.2 From 7508e16c9f2a20f7721d7bc47c33a7b34c873a2c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 3 Aug 2010 11:32:56 +0200 Subject: KVM: PPC: Add feature bitmap for magic page We will soon add SR PV support to the shared page, so we need some infrastructure that allows the guest to query for features KVM exports. This patch adds a second return value to the magic mapping that indicated to the guest which features are available. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_para.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 7438ab360120..43c1b2260af8 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -47,6 +47,8 @@ struct kvm_vcpu_arch_shared { #define KVM_FEATURE_MAGIC_PAGE 1 +#define KVM_MAGIC_FEAT_SR (1 << 0) + #ifdef __KERNEL__ #ifdef CONFIG_KVM_GUEST -- cgit v1.2.2 From 8e8651783ff2458f31098be7c2abacf2fcab054a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 3 Aug 2010 01:06:11 +0200 Subject: KVM: PPC: Interpret SR registers on demand Right now we're examining the contents of Book3s_32's segment registers when the register is written and put the interpreted contents into a struct. There are two reasons this is bad. For starters, the struct has worse real-time performance, as it occupies more ram. But the more important part is that with segment registers being interpreted from their raw values, we can put them in the shared page, allowing guests to mess with them directly. This patch makes the internal representation of SRs be u32s. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f04f516c97da..08846520220c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -38,15 +38,6 @@ struct kvmppc_slb { bool class : 1; }; -struct kvmppc_sr { - u32 raw; - u32 vsid; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool valid : 1; -}; - struct kvmppc_bat { u64 raw; u32 bepi; @@ -79,7 +70,7 @@ struct kvmppc_vcpu_book3s { u64 vsid; } slb_shadow[64]; u8 slb_shadow_max; - struct kvmppc_sr sr[16]; + u32 sr[16]; struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; -- cgit v1.2.2 From df1bfa25d81f9451715ccbbb67551e0f792ceec8 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 3 Aug 2010 02:29:27 +0200 Subject: KVM: PPC: Put segment registers in shared page Now that the actual mtsr doesn't do anything anymore, we can move the sr contents over to the shared page, so a guest can directly read and write its sr contents from guest context. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 1 - arch/powerpc/include/asm/kvm_para.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 08846520220c..be8aac24ba83 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -70,7 +70,6 @@ struct kvmppc_vcpu_book3s { u64 vsid; } slb_shadow[64]; u8 slb_shadow_max; - u32 sr[16]; struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 43c1b2260af8..d79fd0910964 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -38,6 +38,7 @@ struct kvm_vcpu_arch_shared { __u64 msr; __u32 dsisr; __u32 int_pending; /* Tells the guest if we have an interrupt */ + __u32 sr[16]; }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ -- cgit v1.2.2 From 8b6db3bc965c204db6868d4005808b4fdc9c46d7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 15 Aug 2010 08:04:24 +0200 Subject: KVM: PPC: Implement correct SID mapping on Book3s_32 Up until now we were doing segment mappings wrong on Book3s_32. For Book3s_64 we were using a trick where we know that a single mmu_context gives us 16 bits of context ids. The mm system on Book3s_32 instead uses a clever algorithm to distribute VSIDs across the available range, so a context id really only gives us 16 available VSIDs. To keep at least a few guest processes in the SID shadow, let's map a number of contexts that we can use as VSID pool. This makes the code be actually correct and shouldn't hurt performance too much. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index be8aac24ba83..d62e703f1214 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -60,6 +60,13 @@ struct kvmppc_sid_map { #define SID_MAP_NUM (1 << SID_MAP_BITS) #define SID_MAP_MASK (SID_MAP_NUM - 1) +#ifdef CONFIG_PPC_BOOK3S_64 +#define SID_CONTEXTS 1 +#else +#define SID_CONTEXTS 128 +#define VSID_POOL_SIZE (SID_CONTEXTS * 16) +#endif + struct kvmppc_vcpu_book3s { struct kvm_vcpu vcpu; struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; @@ -78,10 +85,14 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 vsid_first; u64 vsid_next; +#ifdef CONFIG_PPC_BOOK3S_32 + u32 vsid_pool[VSID_POOL_SIZE]; +#else + u64 vsid_first; u64 vsid_max; - int context_id; +#endif + int context_id[SID_CONTEXTS]; ulong prog_flags; /* flags to inject when giving a 700 trap */ }; -- cgit v1.2.2 From 17bd158006a33615270f9dba15c62f49bd447435 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 30 Aug 2010 10:44:15 +0200 Subject: KVM: PPC: Implement Level interrupts on Book3S The current interrupt logic is just completely broken. We get a notification from user space, telling us that an interrupt is there. But then user space expects us that we just acknowledge an interrupt once we deliver it to the guest. This is not how real hardware works though. On real hardware, the interrupt controller pulls the external interrupt line until it gets notified that the interrupt was received. So in reality we have two events: pulling and letting go of the interrupt line. To maintain backwards compatibility, I added a new request for the pulling part. The letting go part was implemented earlier already. With this in place, we can now finally start guests that do not randomly stall and stop to work at random times. This patch implements above logic for Book3S. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm.h | 1 + arch/powerpc/include/asm/kvm_asm.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 6c5547d82bbe..18ea6963ad77 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -86,5 +86,6 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_SET -1U #define KVM_INTERRUPT_UNSET -2U +#define KVM_INTERRUPT_SET_LEVEL -3U #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index c5ea4cda34b3..5b7504674397 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -58,6 +58,7 @@ #define BOOK3S_INTERRUPT_INST_STORAGE 0x400 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 +#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 @@ -84,7 +85,8 @@ #define BOOK3S_IRQPRIO_EXTERNAL 13 #define BOOK3S_IRQPRIO_DECREMENTER 14 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 -#define BOOK3S_IRQPRIO_MAX 16 +#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 +#define BOOK3S_IRQPRIO_MAX 17 #define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_SLB 0x2 -- cgit v1.2.2 From 26e673c3003bc8f24bdbbdcb8bc91a78556f579a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 3 Sep 2010 10:22:19 +0200 Subject: KVM: PPC: Move of include to __KERNEL__ section We have to protect the include for linux/of.h by __KERNEL__ so it doesn't accidently get referenced outside. This patch fixes this and makes the tree compile again. Reported-by: Stephen Rothwell Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_para.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index d79fd0910964..50533f9adf40 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -21,7 +21,6 @@ #define __POWERPC_KVM_PARA_H__ #include -#include struct kvm_vcpu_arch_shared { __u64 scratch1; @@ -54,6 +53,8 @@ struct kvm_vcpu_arch_shared { #ifdef CONFIG_KVM_GUEST +#include + static inline int kvm_para_available(void) { struct device_node *hyper_node; -- cgit v1.2.2