From 7ebcfcf19723254ebe90cdf8718436b9955a9a01 Mon Sep 17 00:00:00 2001 From: Jonas Larsson Date: Tue, 31 Mar 2009 11:16:52 +0200 Subject: avr32: Solves problem with inverted MCI detect pin on Merisc board Same patch as before, modified to use bool. This patch solves the problem with the inverted mci detect pin on Merisc boards. Signed-off-by: Jonas Larsson Signed-off-by: Haavard Skinnemoen --- arch/avr32/boards/merisc/setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c index 20b300cf105a..623b077594fc 100644 --- a/arch/avr32/boards/merisc/setup.c +++ b/arch/avr32/boards/merisc/setup.c @@ -94,9 +94,10 @@ static struct spi_board_info __initdata spi0_board_info[] = { static struct mci_platform_data __initdata mci0_data = { .slot[0] = { - .bus_width = 4, - .detect_pin = GPIO_PIN_PE(19), - .wp_pin = GPIO_PIN_PE(20), + .bus_width = 4, + .detect_pin = GPIO_PIN_PE(19), + .wp_pin = GPIO_PIN_PE(20), + .detect_is_active_high = true, }, }; -- cgit v1.2.2 From 334034c132017fa662716b70591b2297ed7f238c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 May 2009 13:37:11 -0700 Subject: avr32: remove obsolete hw_interrupt_type The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) have been kept around for migration reasons. After more than two years it's time to remove them finally. This patch cleans up one of the remaining users. When all such patches hit mainline we can remove the defines and typedefs finally. Impact: cleanup Convert the last remaining users to struct irq_chip and remove the define. Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Haavard Skinnemoen --- arch/avr32/include/asm/hw_irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/avr32/include/asm/hw_irq.h b/arch/avr32/include/asm/hw_irq.h index 218b0a6bfd1b..a36f9fcb8fcd 100644 --- a/arch/avr32/include/asm/hw_irq.h +++ b/arch/avr32/include/asm/hw_irq.h @@ -1,7 +1,7 @@ #ifndef __ASM_AVR32_HW_IRQ_H #define __ASM_AVR32_HW_IRQ_H -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) +static inline void hw_resend_irq(struct irq_chip *h, unsigned int i) { /* Nothing to do */ } -- cgit v1.2.2 From 873a2e89c5e32a5c72a9ece76fcb871358ae22d2 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 11 May 2009 15:37:26 +0200 Subject: microblaze: Remove POWERPC reference from Microblaze gpio.h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/gpio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/gpio.h b/arch/microblaze/include/asm/gpio.h index ea04632399d8..2345ac354d9b 100644 --- a/arch/microblaze/include/asm/gpio.h +++ b/arch/microblaze/include/asm/gpio.h @@ -11,8 +11,8 @@ * (at your option) any later version. */ -#ifndef __ASM_POWERPC_GPIO_H -#define __ASM_POWERPC_GPIO_H +#ifndef _ASM_MICROBLAZE_GPIO_H +#define _ASM_MICROBLAZE_GPIO_H #include #include @@ -53,4 +53,4 @@ static inline int irq_to_gpio(unsigned int irq) #endif /* CONFIG_GPIOLIB */ -#endif /* __ASM_POWERPC_GPIO_H */ +#endif /* _ASM_MICROBLAZE_GPIO_H */ -- cgit v1.2.2 From 6fa612b56c575a5235568593eab4240c90608630 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 11 May 2009 15:49:12 +0200 Subject: microblaze: Kconfig: Enable drivers for Microblaze Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 8cc312b5d4dc..e6af7dd01fe6 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration" config MICROBLAZE def_bool y select HAVE_LMB + select ARCH_WANT_OPTIONAL_GPIOLIB config SWAP def_bool n @@ -49,6 +50,9 @@ config GENERIC_CLOCKEVENTS config GENERIC_HARDIRQS_NO__DO_IRQ def_bool y +config GENERIC_GPIO + def_bool y + config PCI depends on !MMU def_bool n @@ -105,9 +109,6 @@ config CMDLINE_FORCE config OF def_bool y -config OF_DEVICE - def_bool y - config PROC_DEVICETREE bool "Support for device tree in /proc" depends on PROC_FS -- cgit v1.2.2 From 5af7fa68103e7b2efb0fd9d901b1c25bad96fd21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 May 2009 21:48:15 +0000 Subject: microblaze: export some symbols Some device drivers require the symbols _ebss, kernel_thread, __page_offset or ___range_ok, so export them. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/kernel/microblaze_ksyms.c | 2 ++ arch/microblaze/kernel/process.c | 1 + arch/microblaze/mm/init.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c index 5f71790e3c3c..59ff20e33e0c 100644 --- a/arch/microblaze/kernel/microblaze_ksyms.c +++ b/arch/microblaze/kernel/microblaze_ksyms.c @@ -45,3 +45,5 @@ extern void __udivsi3(void); EXPORT_SYMBOL(__udivsi3); extern void __umodsi3(void); EXPORT_SYMBOL(__umodsi3); +extern char *_ebss; +EXPORT_SYMBOL_GPL(_ebss); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 07d4fa339eda..685ad71ced50 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -173,6 +173,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL_GPL(kernel_thread); unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index b0c8213cd6cf..31ec053c1dd6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -24,7 +24,7 @@ #include unsigned int __page_offset; -/* EXPORT_SYMBOL(__page_offset); */ +EXPORT_SYMBOL(__page_offset); char *klimit = _end; @@ -199,3 +199,4 @@ int ___range_ok(unsigned long addr, unsigned long size) return ((addr < memory_start) || ((addr + size) > memory_end)); } +EXPORT_SYMBOL(___range_ok); -- cgit v1.2.2 From 353b431bafc42ed8782c7aec7fb819ca4e385ab1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 May 2009 13:37:46 +0000 Subject: microblaze: fix __user annotations The microblaze signal handling code gets some __user pointers wrong, as shown by sparse. This adds the annotations where appropriate and change sys_rt_sigreturn to correctly pass a user stack down to do_sigaltstack instead of a kernel structure. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/kernel/signal.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 40d36931e363..804a074a94a8 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -152,8 +152,8 @@ struct rt_sigframe { unsigned long tramp[2]; /* signal trampoline */ }; -static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc, int *rval_p) { unsigned int err = 0; @@ -211,11 +211,10 @@ badframe: asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) { - struct rt_sigframe *frame = - (struct rt_sigframe *)(regs->r1 + STATE_SAVE_ARG_SPACE); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)(regs->r1 + STATE_SAVE_ARG_SPACE); sigset_t set; - stack_t st; int rval; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -233,11 +232,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval)) goto badframe; - if (__copy_from_user((void *)&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs->r1); + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->r1)) + goto badframe; return rval; @@ -251,7 +249,7 @@ badframe: */ static int -setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask) { int err = 0; @@ -278,7 +276,7 @@ setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, /* * Determine which stack to use.. */ -static inline void * +static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) { /* Default to using normal stack */ @@ -287,7 +285,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack(sp)) sp = current->sas_ss_sp + current->sas_ss_size; - return (void *)((sp - frame_size) & -8UL); + return (void __user *)((sp - frame_size) & -8UL); } static void setup_frame(int sig, struct k_sigaction *ka, @@ -367,7 +365,7 @@ give_sigsegv: static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; int err = 0; int signal; -- cgit v1.2.2 From 70f4cc29350222ff6baf70265f6482fc01565d48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 10 May 2009 14:26:52 +0000 Subject: microblaze: kill incorrect __bad_xchg definition The whole point of the __bad_xchg declaration in system.h is to give a linker error when a variable of invalid size is passed to __xchg. The out of line definition in traps.c defeats this purpose and does not any value, so remove it here. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/kernel/traps.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 293ef486013a..6322cc174474 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -22,14 +22,6 @@ void trap_init(void) __enable_hw_exceptions(); } -void __bad_xchg(volatile void *ptr, int size) -{ - printk(KERN_INFO "xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", - __builtin_return_address(0), ptr, size); - BUG(); -} -EXPORT_SYMBOL(__bad_xchg); - static int kstack_depth_to_print = 24; static int __init kstack_setup(char *s) -- cgit v1.2.2 From 122eec2f023f25fdd491ee9eb8eface4ded70728 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 10 May 2009 14:32:11 +0000 Subject: microblaze: remove cacheable_memcpy This function is neither declared nor used anywhere outside of ppc32, so remove it from microblaze. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/lib/memcpy.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c index 5880119c4487..6a907c58a4bc 100644 --- a/arch/microblaze/lib/memcpy.c +++ b/arch/microblaze/lib/memcpy.c @@ -154,8 +154,3 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) } EXPORT_SYMBOL(memcpy); #endif /* __HAVE_ARCH_MEMCPY */ - -void *cacheable_memcpy(void *d, const void *s, __kernel_size_t c) -{ - return memcpy(d, s, c); -} -- cgit v1.2.2 From 6b4374261a067e2e1b78602eb5e631a4e027dacf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 May 2009 21:36:44 +0000 Subject: microblaze: add security initcalls The security subsystem has its own initcalls, which need support in vmlinux.lds.S. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index 840385e51291..910f7c09b92a 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -132,6 +132,8 @@ SECTIONS { __con_initcall_end = .; } + SECURITY_INIT + __init_end_before_initramfs = .; .init.ramfs ALIGN(4096) : { -- cgit v1.2.2 From 0c60155e14eb00fa13836a710a2d2efb63d8861c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 May 2009 21:44:51 +0000 Subject: microblaze: add a dummy pgprot_noncached Some device drivers call this, so add a macro that pretends to do this. Since there is no MMU support, it won't actually result in an uncached mapping, though. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 4df31e46568e..254fd4ba733b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -27,6 +27,8 @@ #define PAGE_READONLY __pgprot(0) /* these mean nothing to non MMU */ #define PAGE_KERNEL __pgprot(0) /* these mean nothing to non MMU */ +#define pgprot_noncached(x) (x) + #define __swp_type(x) (0) #define __swp_offset(x) (0) #define __swp_entry(typ, off) ((swp_entry_t) { ((typ) | ((off) << 7)) }) -- cgit v1.2.2 From bb09791344d02c8caff596dc084b1542dcb70efe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Apr 2009 22:38:58 +0000 Subject: microblaze: do not include types.h in ptrace.h linux/types.h breaks the uclibc build, so don't include it here. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/include/asm/ptrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h index 55015bce5e47..a917dc517736 100644 --- a/arch/microblaze/include/asm/ptrace.h +++ b/arch/microblaze/include/asm/ptrace.h @@ -10,7 +10,6 @@ #define _ASM_MICROBLAZE_PTRACE_H #ifndef __ASSEMBLY__ -#include typedef unsigned long microblaze_reg_t; -- cgit v1.2.2 From 838d2406ee62595c1b40d1d03b48bc9a2102258b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 May 2009 13:36:13 +0000 Subject: microblaze: remove bad_user_access_length This function was actually causing harm, by hiding errors about invalid sized get_user/put_user accesses. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/include/asm/uaccess.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 5a3ffc308e12..a4e171d49d15 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -34,10 +34,9 @@ extern int ___range_ok(unsigned long addr, unsigned long size); #define access_ok(type, addr, size) (__range_ok((addr), (size)) == 0) #define __access_ok(add, size) (__range_ok((addr), (size)) == 0) -extern inline int bad_user_access_length(void) -{ - return 0; -} +/* Undefined function to trigger linker error */ +extern int bad_user_access_length(void); + /* FIXME this is function for optimalization -> memcpy */ #define __get_user(var, ptr) \ ({ \ -- cgit v1.2.2 From 732703af9c3478c3f935dd5ae80140b9b12bda09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 31 Mar 2009 15:30:31 +0200 Subject: microblaze: clean up checksum.c This changes the function prototypes in the checksum code to have the usual prototypes, typically by turning int arguments into __wsum. Also change csum_partial_copy_from_user() to operate on the right address space and export ip_fast_csum, which is used in modular networking code. The new version is now sparse-clean including endianess checks. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/include/asm/checksum.h | 14 ++++++++------ arch/microblaze/lib/checksum.c | 31 ++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/checksum.h b/arch/microblaze/include/asm/checksum.h index 92b30762ce59..97ea46b5cf80 100644 --- a/arch/microblaze/include/asm/checksum.h +++ b/arch/microblaze/include/asm/checksum.h @@ -51,7 +51,8 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary */ -extern __wsum csum_partial_copy(const char *src, char *dst, int len, int sum); +extern __wsum csum_partial_copy(const void *src, void *dst, int len, + __wsum sum); /* * the same as csum_partial_copy, but copies from user space. @@ -59,8 +60,8 @@ extern __wsum csum_partial_copy(const char *src, char *dst, int len, int sum); * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary */ -extern __wsum csum_partial_copy_from_user(const char *src, char *dst, - int len, int sum, int *csum_err); +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *csum_err); #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy((src), (dst), (len), (sum)) @@ -75,11 +76,12 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); /* * Fold a partial checksum */ -static inline __sum16 csum_fold(unsigned int sum) +static inline __sum16 csum_fold(__wsum csum) { + u32 sum = (__force u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); - return ~sum; + return (__force __sum16)~sum; } static inline __sum16 @@ -93,6 +95,6 @@ csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ -extern __sum16 ip_compute_csum(const unsigned char *buff, int len); +extern __sum16 ip_compute_csum(const void *buff, int len); #endif /* _ASM_MICROBLAZE_CHECKSUM_H */ diff --git a/arch/microblaze/lib/checksum.c b/arch/microblaze/lib/checksum.c index 809340070a13..f08e74591418 100644 --- a/arch/microblaze/lib/checksum.c +++ b/arch/microblaze/lib/checksum.c @@ -32,9 +32,10 @@ /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most of the assembly has to go. */ -#include -#include #include +#include + +#include static inline unsigned short from32to16(unsigned long x) { @@ -102,6 +103,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph, ihl*4); } +EXPORT_SYMBOL(ip_fast_csum); /* * computes the checksum of a memory block at buff, length len, @@ -115,15 +117,16 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * * it's best to have buff aligned on a 32-bit boundary */ -__wsum csum_partial(const void *buff, int len, __wsum sum) +__wsum csum_partial(const void *buff, int len, __wsum wsum) { + unsigned int sum = (__force unsigned int)wsum; unsigned int result = do_csum(buff, len); /* add in old sum, and carry.. */ result += sum; if (sum > result) result += 1; - return result; + return (__force __wsum)result; } EXPORT_SYMBOL(csum_partial); @@ -131,9 +134,9 @@ EXPORT_SYMBOL(csum_partial); * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ -__sum16 ip_compute_csum(const unsigned char *buff, int len) +__sum16 ip_compute_csum(const void *buff, int len) { - return ~do_csum(buff, len); + return (__force __sum16)~do_csum(buff, len); } EXPORT_SYMBOL(ip_compute_csum); @@ -141,12 +144,18 @@ EXPORT_SYMBOL(ip_compute_csum); * copy from fs while checksumming, otherwise like csum_partial */ __wsum -csum_partial_copy_from_user(const char __user *src, char *dst, int len, - int sum, int *csum_err) +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *csum_err) { - if (csum_err) + int missing; + + missing = __copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *csum_err = -EFAULT; + } else *csum_err = 0; - memcpy(dst, src, len); + return csum_partial(dst, len, sum); } EXPORT_SYMBOL(csum_partial_copy_from_user); @@ -155,7 +164,7 @@ EXPORT_SYMBOL(csum_partial_copy_from_user); * copy from ds while checksumming, otherwise like csum_partial */ __wsum -csum_partial_copy(const char *src, char *dst, int len, int sum) +csum_partial_copy(const void *src, void *dst, int len, __wsum sum) { memcpy(dst, src, len); return csum_partial(dst, len, sum); -- cgit v1.2.2 From a6029d1c8cfc9ac2609195f31c2e70b584d3496e Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 12 May 2009 12:10:52 +0200 Subject: microblaze: prepare signal handling for generic unistd.h We need to define set_restore_sigmask() in order to get pselect and ppoll. Also, the setup_frame function can not be used when __NR_sigreturn is not defined. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/include/asm/syscalls.h | 3 ++ arch/microblaze/include/asm/thread_info.h | 11 +++++ arch/microblaze/kernel/signal.c | 79 ++----------------------------- 3 files changed, 17 insertions(+), 76 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/syscalls.h b/arch/microblaze/include/asm/syscalls.h index 9cb4ff0edeb2..ddea9eb31f8d 100644 --- a/arch/microblaze/include/asm/syscalls.h +++ b/arch/microblaze/include/asm/syscalls.h @@ -34,6 +34,9 @@ asmlinkage int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, asmlinkage int sys_sigaction(int sig, const struct old_sigaction *act, struct old_sigaction *oact); +asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, + struct sigaction __user *oact, size_t sigsetsize); + asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs); diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 4c3943e3f403..a0401d2ef8d5 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -154,6 +154,17 @@ static inline struct thread_info *current_thread_info(void) */ /* FPU was used by this task this quantum (SMP) */ #define TS_USEDFPU 0x0001 +#define TS_RESTORE_SIGMASK 0x0002 + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); +} +#endif #endif /* __KERNEL__ */ #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */ diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 804a074a94a8..3cdcbfe41295 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -288,80 +288,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) return (void __user *)((sp - frame_size) & -8UL); } -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs) -{ - struct sigframe *frame; - int err = 0; - int signal; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - signal = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; - - err |= setup_sigcontext(&frame->sc, regs, set->sig[0]); - - if (_NSIG_WORDS > 1) { - err |= __copy_to_user(frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - } - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* minus 8 is offset to cater for "rtsd r15,8" offset */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->r15 = ((unsigned long)ka->sa.sa_restorer)-8; - } else { - /* Note, these encodings are _big endian_! */ - - /* addi r12, r0, __NR_sigreturn */ - err |= __put_user(0x31800000 | __NR_sigreturn , - frame->tramp + 0); - /* brki r14, 0x8 */ - err |= __put_user(0xb9cc0008, frame->tramp + 1); - - /* Return from sighandler will jump to the tramp. - Negative 8 offset because return is rtsd r15, 8 */ - regs->r15 = ((unsigned long)frame->tramp)-8; - - __invalidate_cache_sigtramp((unsigned long)frame->tramp); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->r1 = (unsigned long) frame - STATE_SAVE_ARG_SPACE; - - /* Signal handler args: */ - regs->r5 = signal; /* Arg 0: signum */ - regs->r6 = (unsigned long) &frame->sc; /* arg 1: sigcontext */ - - /* Offset of 4 to handle microblaze rtid r14, 0 */ - regs->pc = (unsigned long)ka->sa.sa_handler; - - set_fs(USER_DS); - -#ifdef DEBUG_SIG - printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n", - current->comm, current->pid, frame, regs->pc); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { @@ -380,7 +306,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ? current_thread_info()->exec_domain->signal_invmap[sig] : sig; - err |= copy_siginfo_to_user(&frame->info, info); + if (info) + err |= copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); @@ -478,7 +405,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_SIGINFO) setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + setup_rt_frame(sig, ka, NULL, oldset, regs); if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; -- cgit v1.2.2 From f2224ff07f345f3f9716071cc90ee50e29af7497 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Apr 2009 17:38:25 +0200 Subject: microblaze: use generic dma-mapping-broken.h Microblaze does not support the Linux DMA mapping API at this point, so disable CONFIG_NO_DMA. This lets us use the generic dma-mapping-broken.h implementation instead of providing a different copy. Any drivers that try to use DMA mapping now get omitted from Kconfig or produce a link error, rather than failing silently at run time. Signed-off-by: Arnd Bergmann Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 2 +- arch/microblaze/include/asm/dma-mapping.h | 130 +----------------------------- 2 files changed, 2 insertions(+), 130 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index e6af7dd01fe6..f8e0722df0cf 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -59,7 +59,7 @@ config PCI config NO_DMA depends on !MMU - def_bool n + def_bool y source "init/Kconfig" diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 17336252a9b8..d00e40099165 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -1,129 +1 @@ -/* - * Copyright (C) 2006 Atmark Techno, Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H -#define _ASM_MICROBLAZE_DMA_MAPPING_H - -#include -#include -#include - -struct scatterlist; - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -/* FIXME */ -static inline int -dma_supported(struct device *dev, u64 mask) -{ - return 1; -} - -static inline dma_addr_t -dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG(); - return 0; -} - -static inline void -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - BUG(); - return 0; -} - -static inline void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, int flag) -{ - return NULL; /* consistent_alloc(flag, size, dma_handle); */ -} - -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - BUG(); -} - -static inline dma_addr_t -dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - return virt_to_bus(ptr); -} - -static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, - enum dma_data_direction direction) -{ - switch (direction) { - case DMA_FROM_DEVICE: - flush_dcache_range((unsigned)dma_addr, - (unsigned)dma_addr + size); - /* Fall through */ - case DMA_TO_DEVICE: - break; - default: - BUG(); - } -} - -#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */ +#include -- cgit v1.2.2 From 5af90438023786e27178cc542f9a775594f8a126 Mon Sep 17 00:00:00 2001 From: Steve Magnani Date: Mon, 18 May 2009 03:22:40 +0200 Subject: microblaze: Fix paging init-zone initialization This patch fix problem with bad zone initialization. This bug wasn't perform because Microblaze doesn't define CONFIG_ZONE_DMA and ZONE_NORMAL was 0 for this case that's why free_area_init works with correct values. Original message: I believe that the switch from ZONE_DMA (== 0) to ZONE_NORMAL broke the free area initialization. Signed-off-by: Steven J. Magnani Signed-off-by: Michal Simek --- arch/microblaze/mm/init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 31ec053c1dd6..6ef5088a8d2a 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -44,16 +44,15 @@ static void __init paging_init(void) int i; unsigned long zones_size[MAX_NR_ZONES]; + /* Clean every zones */ + memset(zones_size, 0, sizeof(zones_size)); + /* * old: we can DMA to/from any address.put all page into ZONE_DMA * We use only ZONE_NORMAL */ zones_size[ZONE_NORMAL] = max_mapnr; - /* every other zones are empty */ - for (i = 1; i < MAX_NR_ZONES; i++) - zones_size[i] = 0; - free_area_init(zones_size); } -- cgit v1.2.2 From c3055d14565516867e7a45553b90cb88d6277ec9 Mon Sep 17 00:00:00 2001 From: Thomas Chou Date: Tue, 19 May 2009 22:48:10 +0800 Subject: microblaze: clean LDFLAGS to build kernel Extra LDFLAGS from user space building may cause kernel failed to compile. Signed-off-by: Thomas Chou Signed-off-by: Michal Simek --- arch/microblaze/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index aaadfa701da3..ab731b75c23b 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -36,6 +36,8 @@ CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) # r31 holds current when in kernel mode CFLAGS_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) +LDFLAGS := +LDFLAGS_vmlinux := LDFLAGS_BLOB := --format binary --oformat elf32-microblaze LIBGCC := $(shell $(CC) $(CFLAGS_KERNEL) -print-libgcc-file-name) -- cgit v1.2.2 From 6e3d4e1d16a19f8462beb5bfe5f2c464770e795e Mon Sep 17 00:00:00 2001 From: Steve Magnani Date: Sun, 17 May 2009 20:32:07 -0500 Subject: microblaze: Guard __HAVE_ARCH macros with __KERNEL__ in string.h A polarity reversal in the __KERNEL__ guard prevents the __HAVE_ARCH flags from being defined in kernel compilation. I noticed that there's now an option for assembly-optimized versions of memcpy and memmove. I believe this may be buggy; when I turn it on, all my printk output gets smashed together, as if the newlines aren't getting copied. Signed-off-by: Steven J. Magnani Signed-off-by: Michal Simek --- arch/microblaze/include/asm/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/string.h b/arch/microblaze/include/asm/string.h index f7728c90fc18..aec2f59298b8 100644 --- a/arch/microblaze/include/asm/string.h +++ b/arch/microblaze/include/asm/string.h @@ -9,7 +9,7 @@ #ifndef _ASM_MICROBLAZE_STRING_H #define _ASM_MICROBLAZE_STRING_H -#ifndef __KERNEL__ +#ifdef __KERNEL__ #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY -- cgit v1.2.2 From 1dff89a9c7fab71b43ba79cc1aa6c6dbad582a35 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 21 May 2009 08:20:30 +0200 Subject: microblaze: Fix early cmdline for CMDLINE_FORCE This patch fixed parsing early parameters because current implementation does that early parse DTS command line and then parse CMDLINE line which is compiled-in. For case that DTS doesn't contain command line is copied command line from kernel with is done in prom.c that's why I can remove it from machine_early_init. Signed-off-by: Michal Simek --- arch/microblaze/kernel/prom.c | 2 ++ arch/microblaze/kernel/setup.c | 9 +-------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 34c48718061a..003d37360623 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -563,7 +563,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node, strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); #ifdef CONFIG_CMDLINE +#ifndef CONFIG_CMDLINE_FORCE if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) +#endif strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif /* CONFIG_CMDLINE */ diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index eb6b41758e23..c156b16f9ee9 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -42,10 +42,6 @@ char cmd_line[COMMAND_LINE_SIZE]; void __init setup_arch(char **cmdline_p) { -#ifdef CONFIG_CMDLINE_FORCE - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif *cmdline_p = cmd_line; console_verbose(); @@ -106,10 +102,7 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, memset(__bss_start, 0, __bss_stop-__bss_start); memset(_ssbss, 0, _esbss-_ssbss); - /* - * Copy command line passed from bootloader, or use default - * if none provided, or forced - */ + /* Copy command line passed from bootloader */ #ifndef CONFIG_CMDLINE_BOOL if (cmdline && cmdline[0] != '\0') strlcpy(cmd_line, cmdline, COMMAND_LINE_SIZE); -- cgit v1.2.2 From cda1fd5a60e97a1a1bf96606f201818b207b2c5c Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 19 May 2009 14:33:47 +0200 Subject: microblaze: Cleanup compiled-in rootfs in BSS section This patch is based on patch from Steve Magnani. There were bug for compiled-in rootfs. We have to move moving rootfs which is in BSS section to _ebss section which is at the end of kernel and then clear bss section not vice-versa. Signed-off-by: Michal Simek --- arch/microblaze/kernel/setup.c | 53 +++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index c156b16f9ee9..8709bea09604 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -98,6 +98,29 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, { unsigned long *src, *dst = (unsigned long *)0x0; + /* If CONFIG_MTD_UCLINUX is defined, assume ROMFS is at the + * end of kernel. There are two position which we want to check. + * The first is __init_end and the second __bss_start. + */ +#ifdef CONFIG_MTD_UCLINUX + int romfs_size; + unsigned int romfs_base; + char *old_klimit = klimit; + + romfs_base = (ram ? ram : (unsigned int)&__init_end); + romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base)); + if (!romfs_size) { + romfs_base = (unsigned int)&__bss_start; + romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base)); + } + + /* Move ROMFS out of BSS before clearing it */ + if (romfs_size > 0) { + memmove(&_ebss, (int *)romfs_base, romfs_size); + klimit += romfs_size; + } +#endif + /* clearing bss section */ memset(__bss_start, 0, __bss_stop-__bss_start); memset(_ssbss, 0, _esbss-_ssbss); @@ -119,27 +142,15 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, printk(KERN_NOTICE "Found FDT at 0x%08x\n", fdt); #ifdef CONFIG_MTD_UCLINUX - { - int size; - unsigned int romfs_base; - romfs_base = (ram ? ram : (unsigned int)&__init_end); - /* if CONFIG_MTD_UCLINUX_EBSS is defined, assume ROMFS is at the - * end of kernel, which is ROMFS_LOCATION defined above. */ - size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base)); - early_printk("Found romfs @ 0x%08x (0x%08x)\n", - romfs_base, size); - early_printk("#### klimit %p ####\n", klimit); - BUG_ON(size < 0); /* What else can we do? */ - - /* Use memmove to handle likely case of memory overlap */ - early_printk("Moving 0x%08x bytes from 0x%08x to 0x%08x\n", - size, romfs_base, (unsigned)&_ebss); - memmove(&_ebss, (int *)romfs_base, size); - - /* update klimit */ - klimit += PAGE_ALIGN(size); - early_printk("New klimit: 0x%08x\n", (unsigned)klimit); - } + early_printk("Found romfs @ 0x%08x (0x%08x)\n", + romfs_base, romfs_size); + early_printk("#### klimit %p ####\n", old_klimit); + BUG_ON(romfs_size < 0); /* What else can we do? */ + + early_printk("Moved 0x%08x bytes from 0x%08x to 0x%08x\n", + romfs_size, romfs_base, (unsigned)&_ebss); + + early_printk("New klimit: 0x%08x\n", (unsigned)klimit); #endif for (src = __ivt_start; src < __ivt_end; src++, dst++) -- cgit v1.2.2 From e93b55bfb3ead1cb4d9df7a5e0686318cfa8690c Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 21 May 2009 13:53:56 +0200 Subject: Microblaze: Remove unused variable from paging init Signed-off-by: Michal Simek --- arch/microblaze/mm/init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 6ef5088a8d2a..af789e26a7ce 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -41,7 +41,6 @@ unsigned int memory_size; */ static void __init paging_init(void) { - int i; unsigned long zones_size[MAX_NR_ZONES]; /* Clean every zones */ -- cgit v1.2.2 From b9479e666563d5c28eb861d2a8f6334666025384 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 21 May 2009 16:33:07 +0200 Subject: microblaze: Fix cast warning for __va in prom.c __va expect 32bit value but of_read_ulong(of_read_number) returns 64bit value Signed-off-by: Michal Simek --- arch/microblaze/kernel/prom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 003d37360623..c005cc6f1aaf 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -509,12 +509,13 @@ static void __init early_init_dt_check_for_initrd(unsigned long node) prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); if (prop) { - initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4)); + initrd_start = (unsigned long) + __va((u32)of_read_ulong(prop, l/4)); prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); if (prop) { initrd_end = (unsigned long) - __va(of_read_ulong(prop, l/4)); + __va((u32)of_read_ulong(prop, 1/4)); initrd_below_start_ok = 1; } else { initrd_start = 0; -- cgit v1.2.2 From aaf22af46ff2b3f0a5c4fcaf1287ac6c28d4078e Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 21 May 2009 17:27:58 +0200 Subject: microblaze: Kbuild update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/Kbuild | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 31820dfef56b..db5294c30caf 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,26 +1,3 @@ include include/asm-generic/Kbuild.asm -header-y += auxvec.h -header-y += errno.h -header-y += fcntl.h -header-y += ioctl.h -header-y += ioctls.h -header-y += ipcbuf.h -header-y += linkage.h -header-y += msgbuf.h -header-y += poll.h -header-y += resource.h -header-y += sembuf.h -header-y += shmbuf.h -header-y += sigcontext.h -header-y += siginfo.h -header-y += socket.h -header-y += sockios.h -header-y += statfs.h -header-y += stat.h -header-y += termbits.h -header-y += ucontext.h - -unifdef-y += cputable.h -unifdef-y += elf.h -unifdef-y += termios.h +header-y += elf.h -- cgit v1.2.2 From 0bc53a67ac831ec84f730a657dbcadd80a589ef5 Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Sat, 23 May 2009 19:13:03 -0400 Subject: ASoC: Add a few more mpc5200 PSC defines Add a few more mpc5200 PSC defines. More bit fields defines for mpc5200 PSC registers. Signed-off-by: Jon Smirl Acked-by: Grant Likely Signed-off-by: Mark Brown --- arch/powerpc/include/asm/mpc52xx_psc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h index a218da6bec7c..fb8412057450 100644 --- a/arch/powerpc/include/asm/mpc52xx_psc.h +++ b/arch/powerpc/include/asm/mpc52xx_psc.h @@ -28,6 +28,10 @@ #define MPC52xx_PSC_MAXNUM 6 /* Programmable Serial Controller (PSC) status register bits */ +#define MPC52xx_PSC_SR_UNEX_RX 0x0001 +#define MPC52xx_PSC_SR_DATA_VAL 0x0002 +#define MPC52xx_PSC_SR_DATA_OVR 0x0004 +#define MPC52xx_PSC_SR_CMDSEND 0x0008 #define MPC52xx_PSC_SR_CDE 0x0080 #define MPC52xx_PSC_SR_RXRDY 0x0100 #define MPC52xx_PSC_SR_RXFULL 0x0200 @@ -61,6 +65,12 @@ #define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 /* PSC interrupt status/mask bits */ +#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001 +#define MPC52xx_PSC_IMR_DATA_VALID 0x0002 +#define MPC52xx_PSC_IMR_DATA_OVR 0x0004 +#define MPC52xx_PSC_IMR_CMD_SEND 0x0008 +#define MPC52xx_PSC_IMR_ERROR 0x0040 +#define MPC52xx_PSC_IMR_DEOF 0x0080 #define MPC52xx_PSC_IMR_TXRDY 0x0100 #define MPC52xx_PSC_IMR_RXRDY 0x0200 #define MPC52xx_PSC_IMR_DB 0x0400 @@ -117,6 +127,7 @@ #define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24) +#define MPC52xx_PSC_SICR_AWR (1 << 30) #define MPC52xx_PSC_SICR_GENCLK (1 << 23) #define MPC52xx_PSC_SICR_I2S (1 << 22) #define MPC52xx_PSC_SICR_CLKPOL (1 << 21) -- cgit v1.2.2 From 0945f98b4a844b488d4e42a43f90a3c3aef281af Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Sun, 10 May 2009 14:39:05 +0200 Subject: microblaze: Add audit and seccomp thread flags. Signed-off-by: Edgar E. Iglesias --- arch/microblaze/include/asm/thread_info.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index a0401d2ef8d5..7fac44498445 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -122,6 +122,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 4 #define TIF_IRET 5 /* return with iret */ #define TIF_MEMDIE 6 +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ #define TIF_FREEZE 14 /* Freezing for suspend */ /* FIXME change in entry.S */ @@ -138,10 +140,17 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_IRET (1< Date: Tue, 26 May 2009 10:14:49 +0200 Subject: microblaze: Fix size of __kernel_mode_t to short This patches solve problem with inconsistency between kernel and glibc Signed-off-by: Michal Simek --- arch/microblaze/include/asm/posix_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/posix_types.h b/arch/microblaze/include/asm/posix_types.h index b4df41c5dde2..8c758b231f37 100644 --- a/arch/microblaze/include/asm/posix_types.h +++ b/arch/microblaze/include/asm/posix_types.h @@ -16,7 +16,7 @@ */ typedef unsigned long __kernel_ino_t; -typedef unsigned int __kernel_mode_t; +typedef unsigned short __kernel_mode_t; typedef unsigned int __kernel_nlink_t; typedef long __kernel_off_t; typedef int __kernel_pid_t; -- cgit v1.2.2 From 2c65b4665f3f1651a7ef652d86eeb23be95dcdb9 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:07 +0200 Subject: microblaze_mmu_v2: Add mmu_defconfig Signed-off-by: Michal Simek --- arch/microblaze/configs/mmu_defconfig | 798 ++++++++++++++++++++++++++++++++++ 1 file changed, 798 insertions(+) create mode 100644 arch/microblaze/configs/mmu_defconfig (limited to 'arch') diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig new file mode 100644 index 000000000000..bd0b85ec38f5 --- /dev/null +++ b/arch/microblaze/configs/mmu_defconfig @@ -0,0 +1,798 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.30-rc6 +# Fri May 22 10:02:33 2009 +# +CONFIG_MICROBLAZE=y +# CONFIG_SWAP is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y +# CONFIG_GENERIC_TIME_VSYSCALL is not set +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_GENERIC_GPIO=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="rootfs.cpio" +CONFIG_INITRAMFS_ROOT_UID=0 +CONFIG_INITRAMFS_ROOT_GID=0 +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +CONFIG_INITRAMFS_COMPRESSION_NONE=y +# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set +# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set +# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_HOTPLUG is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +# CONFIG_BASE_FULL is not set +# CONFIG_FUTEX is not set +# CONFIG_EPOLL is not set +# CONFIG_SIGNALFD is not set +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +# CONFIG_SHMEM is not set +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +# CONFIG_SLOW_WORK is not set +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set +CONFIG_SLABINFO=y +CONFIG_BASE_SMALL=1 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_FREEZER is not set + +# +# Platform options +# +CONFIG_PLATFORM_GENERIC=y +CONFIG_OPT_LIB_FUNCTION=y +CONFIG_OPT_LIB_ASM=y +CONFIG_ALLOW_EDIT_AUTO=y + +# +# Automatic platform settings from Kconfig.auto +# + +# +# Definitions for MICROBLAZE0 +# +CONFIG_KERNEL_BASE_ADDR=0x90000000 +CONFIG_XILINX_MICROBLAZE0_FAMILY="virtex5" +CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1 +CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1 +CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1 +CONFIG_XILINX_MICROBLAZE0_USE_DIV=1 +CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL=2 +CONFIG_XILINX_MICROBLAZE0_USE_FPU=2 +CONFIG_XILINX_MICROBLAZE0_HW_VER="7.10.d" + +# +# Processor type and features +# +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=100 +# CONFIG_SCHED_HRTICK is not set +CONFIG_MMU=y + +# +# Boot options +# +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyUL0,115200" +CONFIG_CMDLINE_FORCE=y +CONFIG_OF=y +CONFIG_PROC_DEVICETREE=y + +# +# Advanced setup +# +# CONFIG_ADVANCED_OPTIONS is not set + +# +# Default settings for advanced configuration options are used +# +CONFIG_HIGHMEM_START=0xfe000000 +CONFIG_LOWMEM_SIZE=0x30000000 +CONFIG_KERNEL_START=0xc0000000 +CONFIG_TASK_SIZE=0x80000000 +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y + +# +# Exectuable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set +# CONFIG_BINFMT_MISC is not set +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +CONFIG_OF_DEVICE=y +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=8192 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_XILINX_SYSACE is not set +CONFIG_MISC_DEVICES=y +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_93CX6 is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +# CONFIG_MII is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set +# CONFIG_B44 is not set +CONFIG_NETDEV_1000=y +CONFIG_NETDEV_10000=y + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_DEVKMEM=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_UARTLITE=y +CONFIG_SERIAL_UARTLITE_CONSOLE=y +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_XILINX_HWICAP is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_I2C is not set +# CONFIG_SPI is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_REGULATOR is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_SOUND is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +# CONFIG_PROC_KCORE is not set +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +CONFIG_CIFS=y +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +CONFIG_DEBUG_SLAB=y +# CONFIG_DEBUG_SLAB_LEAK is not set +CONFIG_DEBUG_SPINLOCK=y +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_PAGE_POISONING is not set +# CONFIG_SAMPLES is not set +CONFIG_EARLY_PRINTK=y +CONFIG_HEART_BEAT=y +CONFIG_DEBUG_BOOTMEM=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y -- cgit v1.2.2 From 5846cc608fd42cd3645ff9f841888832c6ef9b6e Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:09 +0200 Subject: microblaze_mmu_v2: MMU update for startup code Signed-off-by: Michal Simek --- arch/microblaze/kernel/head.S | 190 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index 319dc35fc922..e568d6ec621b 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -3,6 +3,26 @@ * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * + * MMU code derived from arch/ppc/kernel/head_4xx.S: + * Copyright (c) 1995-1996 Gary Thomas + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. @@ -12,6 +32,22 @@ #include #include +#ifdef CONFIG_MMU +#include /* COMMAND_LINE_SIZE */ +#include +#include + +.data +.global empty_zero_page +.align 12 +empty_zero_page: + .space 4096 +.global swapper_pg_dir +swapper_pg_dir: + .space 4096 + +#endif /* CONFIG_MMU */ + .text ENTRY(_start) mfs r1, rmsr @@ -32,6 +68,123 @@ _copy_fdt: addik r3, r3, -4 /* descrement loop */ no_fdt_arg: +#ifdef CONFIG_MMU + +#ifndef CONFIG_CMDLINE_BOOL +/* + * handling command line + * copy command line to __init_end. There is space for storing command line. + */ + or r6, r0, r0 /* incremment */ + ori r4, r0, __init_end /* load address of command line */ + tophys(r4,r4) /* convert to phys address */ + ori r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */ +_copy_command_line: + lbu r7, r5, r6 /* r7=r5+r6 - r5 contain pointer to command line */ + sb r7, r4, r6 /* addr[r4+r6]= r7*/ + addik r6, r6, 1 /* increment counting */ + bgtid r3, _copy_command_line /* loop for all entries */ + addik r3, r3, -1 /* descrement loop */ + addik r5, r4, 0 /* add new space for command line */ + tovirt(r5,r5) +#endif /* CONFIG_CMDLINE_BOOL */ + +#ifdef NOT_COMPILE +/* save bram context */ + or r6, r0, r0 /* incremment */ + ori r4, r0, TOPHYS(_bram_load_start) /* save bram context */ + ori r3, r0, (LMB_SIZE - 4) +_copy_bram: + lw r7, r0, r6 /* r7 = r0 + r6 */ + sw r7, r4, r6 /* addr[r4 + r6] = r7*/ + addik r6, r6, 4 /* increment counting */ + bgtid r3, _copy_bram /* loop for all entries */ + addik r3, r3, -4 /* descrement loop */ +#endif + /* We have to turn on the MMU right away. */ + + /* + * Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * virtual to physical. + */ + nop + addik r3, r0, 63 /* Invalidate all TLB entries */ +_invalidate: + mts rtlbx, r3 + mts rtlbhi, r0 /* flush: ensure V is clear */ + bgtid r3, _invalidate /* loop for all entries */ + addik r3, r3, -1 + /* sync */ + + /* + * We should still be executing code at physical address area + * RAM_BASEADDR at this point. However, kernel code is at + * a virtual address. So, set up a TLB mapping to cover this once + * translation is enabled. + */ + + addik r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */ + tophys(r4,r3) /* Load the kernel physical address */ + + mts rpid,r0 /* Load the kernel PID */ + nop + bri 4 + + /* + * Configure and load two entries into TLB slots 0 and 1. + * In case we are pinning TLBs, these are reserved in by the + * other TLB functions. If not reserving, then it doesn't + * matter where they are loaded. + */ + andi r4,r4,0xfffffc00 /* Mask off the real page number */ + ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ + + andi r3,r3,0xfffffc00 /* Mask off the effective page number */ + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) + + mts rtlbx,r0 /* TLB slow 0 */ + + mts rtlblo,r4 /* Load the data portion of the entry */ + mts rtlbhi,r3 /* Load the tag portion of the entry */ + + addik r4, r4, 0x01000000 /* Map next 16 M entries */ + addik r3, r3, 0x01000000 + + ori r6,r0,1 /* TLB slot 1 */ + mts rtlbx,r6 + + mts rtlblo,r4 /* Load the data portion of the entry */ + mts rtlbhi,r3 /* Load the tag portion of the entry */ + + /* + * Load a TLB entry for LMB, since we need access to + * the exception vectors, using a 4k real==virtual mapping. + */ + ori r6,r0,3 /* TLB slot 3 */ + mts rtlbx,r6 + + ori r4,r0,(TLB_WR | TLB_EX) + ori r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) + + mts rtlblo,r4 /* Load the data portion of the entry */ + mts rtlbhi,r3 /* Load the tag portion of the entry */ + + /* + * We now have the lower 16 Meg of RAM mapped into TLB entries, and the + * caches ready to work. + */ +turn_on_mmu: + ori r15,r0,start_here + ori r4,r0,MSR_KERNEL_VMS + mts rmsr,r4 + nop + rted r15,0 /* enables MMU */ + nop + +start_here: +#endif /* CONFIG_MMU */ + /* Initialize small data anchors */ la r13, r0, _KERNEL_SDA_BASE_ la r2, r0, _KERNEL_SDA2_BASE_ @@ -51,6 +204,43 @@ no_fdt_arg: brald r15, r8 nop +#ifndef CONFIG_MMU la r15, r0, machine_halt braid start_kernel nop +#else + /* + * Initialize the MMU. + */ + bralid r15, mmu_init + nop + + /* Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the MicroBlaze, all we invalidate the used TLB entries to clear + * the old 16M byte TLB mappings. + */ + ori r15,r0,TOPHYS(kernel_load_context) + ori r4,r0,MSR_KERNEL + mts rmsr,r4 + nop + bri 4 + rted r15,0 + nop + + /* Load up the kernel context */ +kernel_load_context: + # Keep entry 0 and 1 valid. Entry 3 mapped to LMB can go away. + ori r5,r0,3 + mts rtlbx,r5 + nop + mts rtlbhi,r0 + nop + addi r15, r0, machine_halt + ori r17, r0, start_kernel + ori r4, r0, MSR_KERNEL_VMS + mts rmsr, r4 + nop + rted r17, 0 /* enable MMU and jump to start_kernel */ + nop +#endif /* CONFIG_MMU */ -- cgit v1.2.2 From a43acfbbc8653f70b8da4c64ec534fb45065a2ee Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:10 +0200 Subject: microblaze_mmu_v2: Alocate TLB for early console Signed-off-by: Michal Simek --- arch/microblaze/include/asm/setup.h | 10 ++++++++-- arch/microblaze/kernel/early_printk.c | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 9b98e8e6abae..27f8dafd8c34 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -1,5 +1,6 @@ /* - * Copyright (C) 2007-2008 Michal Simek + * Copyright (C) 2007-2009 Michal Simek + * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -18,7 +19,6 @@ extern unsigned int boot_cpuid; /* move to smp.h */ extern char cmd_line[COMMAND_LINE_SIZE]; -# endif/* __KERNEL__ */ void early_printk(const char *fmt, ...); @@ -30,6 +30,11 @@ void setup_heartbeat(void); unsigned long long sched_clock(void); +# ifdef CONFIG_MMU +extern void mmu_reset(void); +extern void early_console_reg_tlb_alloc(unsigned int addr); +# endif /* CONFIG_MMU */ + void time_init(void); void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, @@ -40,5 +45,6 @@ void machine_shutdown(void); void machine_halt(void); void machine_power_off(void); +# endif/* __KERNEL__ */ # endif /* __ASSEMBLY__ */ #endif /* _ASM_MICROBLAZE_SETUP_H */ diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c index 4b0f0fdb9ca0..7de84923ba07 100644 --- a/arch/microblaze/kernel/early_printk.c +++ b/arch/microblaze/kernel/early_printk.c @@ -87,6 +87,9 @@ int __init setup_early_printk(char *opt) base_addr = early_uartlite_console(); if (base_addr) { early_console_initialized = 1; +#ifdef CONFIG_MMU + early_console_reg_tlb_alloc(base_addr); +#endif early_printk("early_printk_console is enabled at 0x%08x\n", base_addr); -- cgit v1.2.2 From 3f50425c0c9815411b760e36b48e18958819a304 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:10 +0200 Subject: microblaze_mmu_v2: TLB low level code Signed-off-by: Michal Simek --- arch/microblaze/kernel/misc.S | 120 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 arch/microblaze/kernel/misc.S (limited to 'arch') diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S new file mode 100644 index 000000000000..d623efc9083c --- /dev/null +++ b/arch/microblaze/kernel/misc.S @@ -0,0 +1,120 @@ +/* + * Miscellaneous low-level MMU functions. + * + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. + * + * Derived from arch/ppc/kernel/misc.S + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + */ + +#include +#include +#include +#include +#include +#include + + .text +/* + * Flush MMU TLB + * + * We avoid flushing the pinned 0, 1 and possibly 2 entries. + */ +.globl _tlbia; +.align 4; +_tlbia: + addik r12, r0, 63 /* flush all entries (63 - 3) */ + /* isync */ +_tlbia_1: + mts rtlbx, r12 + nop + mts rtlbhi, r0 /* flush: ensure V is clear */ + nop + addik r11, r12, -2 + bneid r11, _tlbia_1 /* loop for all entries */ + addik r12, r12, -1 + /* sync */ + rtsd r15, 8 + nop + +/* + * Flush MMU TLB for a particular address (in r5) + */ +.globl _tlbie; +.align 4; +_tlbie: + mts rtlbsx, r5 /* look up the address in TLB */ + nop + mfs r12, rtlbx /* Retrieve index */ + nop + blti r12, _tlbie_1 /* Check if found */ + mts rtlbhi, r0 /* flush: ensure V is clear */ + nop +_tlbie_1: + rtsd r15, 8 + nop + +/* + * Allocate TLB entry for early console + */ +.globl early_console_reg_tlb_alloc; +.align 4; +early_console_reg_tlb_alloc: + /* + * Load a TLB entry for the UART, so that microblaze_progress() can use + * the UARTs nice and early. We use a 4k real==virtual mapping. + */ + ori r4, r0, 2 + mts rtlbx, r4 /* TLB slot 2 */ + + or r4,r5,r0 + andi r4,r4,0xfffff000 + ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) + + andi r5,r5,0xfffff000 + ori r5,r5,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) + + mts rtlblo,r4 /* Load the data portion of the entry */ + nop + mts rtlbhi,r5 /* Load the tag portion of the entry */ + nop + rtsd r15, 8 + nop + +/* + * Copy a whole page (4096 bytes). + */ +#define COPY_16_BYTES \ + lwi r7, r6, 0; \ + lwi r8, r6, 4; \ + lwi r9, r6, 8; \ + lwi r10, r6, 12; \ + swi r7, r5, 0; \ + swi r8, r5, 4; \ + swi r9, r5, 8; \ + swi r10, r5, 12 + + +/* FIXME DCACHE_LINE_BYTES (CONFIG_XILINX_MICROBLAZE0_DCACHE_LINE_LEN * 4)*/ +#define DCACHE_LINE_BYTES (4 * 4) + +.globl copy_page; +.align 4; +copy_page: + ori r11, r0, (PAGE_SIZE/DCACHE_LINE_BYTES) - 1 +_copy_page_loop: + COPY_16_BYTES +#if DCACHE_LINE_BYTES >= 32 + COPY_16_BYTES +#endif + addik r6, r6, DCACHE_LINE_BYTES + addik r5, r5, DCACHE_LINE_BYTES + bneid r11, _copy_page_loop + addik r11, r11, -1 + rtsd r15, 8 + nop -- cgit v1.2.2 From 4dc60832f516c4ccfd1e6aa07d92cc0f6d21bacb Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:12 +0200 Subject: microblaze_mmu_v2: MMU initialization Signed-off-by: Michal Simek --- arch/microblaze/mm/init.c | 158 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index af789e26a7ce..b5a701cd71e0 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -23,18 +23,26 @@ #include #include +#ifndef CONFIG_MMU unsigned int __page_offset; EXPORT_SYMBOL(__page_offset); +#else +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +int mem_init_done; +static int init_bootmem_done; +#endif /* CONFIG_MMU */ + char *klimit = _end; /* * Initialize the bootmem system and give it all the memory we * have available. */ -unsigned int memory_start; -unsigned int memory_end; /* due to mm/nommu.c */ -unsigned int memory_size; +unsigned long memory_start; +unsigned long memory_end; /* due to mm/nommu.c */ +unsigned long memory_size; /* * paging_init() sets up the page tables - in fact we've already done this. @@ -59,6 +67,7 @@ void __init setup_memory(void) { int i; unsigned long map_size; +#ifndef CONFIG_MMU u32 kernel_align_start, kernel_align_size; /* Find main memory where is the kernel */ @@ -91,6 +100,7 @@ void __init setup_memory(void) __func__, kernel_align_start, kernel_align_start + kernel_align_size, kernel_align_size); +#endif /* * Kernel: * start: base phys address of kernel - page align @@ -119,9 +129,13 @@ void __init setup_memory(void) * for 4GB of memory, using 4kB pages), plus 1 page * (in case the address isn't page-aligned). */ +#ifndef CONFIG_MMU map_size = init_bootmem_node(NODE_DATA(0), PFN_UP(TOPHYS((u32)_end)), min_low_pfn, max_low_pfn); - +#else + map_size = init_bootmem_node(&contig_page_data, + PFN_UP(TOPHYS((u32)_end)), min_low_pfn, max_low_pfn); +#endif lmb_reserve(PFN_UP(TOPHYS((u32)_end)) << PAGE_SHIFT, map_size); /* free bootmem is whole main memory */ @@ -135,6 +149,9 @@ void __init setup_memory(void) reserve_bootmem(lmb.reserved.region[i].base, lmb_size_bytes(&lmb.reserved, i) - 1, BOOTMEM_DEFAULT); } +#ifdef CONFIG_MMU + init_bootmem_done = 1; +#endif paging_init(); } @@ -189,8 +206,12 @@ void __init mem_init(void) printk(KERN_INFO "Memory: %luk/%luk available\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), num_physpages << (PAGE_SHIFT-10)); +#ifdef CONFIG_MMU + mem_init_done = 1; +#endif } +#ifndef CONFIG_MMU /* Check against bounds of physical memory */ int ___range_ok(unsigned long addr, unsigned long size) { @@ -198,3 +219,132 @@ int ___range_ok(unsigned long addr, unsigned long size) ((addr + size) > memory_end)); } EXPORT_SYMBOL(___range_ok); + +#else +int page_is_ram(unsigned long pfn) +{ + return pfn < max_low_pfn; +} + +/* + * Check for command-line options that affect what MMU_init will do. + */ +static void mm_cmdline_setup(void) +{ + unsigned long maxmem = 0; + char *p = cmd_line; + + /* Look for mem= option on command line */ + p = strstr(cmd_line, "mem="); + if (p) { + p += 4; + maxmem = memparse(p, &p); + if (maxmem && memory_size > maxmem) { + memory_size = maxmem; + memory_end = memory_start + memory_size; + lmb.memory.region[0].size = memory_size; + } + } +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +static void __init mmu_init_hw(void) +{ + /* + * The Zone Protection Register (ZPR) defines how protection will + * be applied to every page which is a member of a given zone. At + * present, we utilize only two of the zones. + * The zone index bits (of ZSEL) in the PTE are used for software + * indicators, except the LSB. For user access, zone 1 is used, + * for kernel access, zone 0 is used. We set all but zone 1 + * to zero, allowing only kernel access as indicated in the PTE. + * For zone 1, we set a 01 binary (a value of 10 will not work) + * to allow user access as indicated in the PTE. This also allows + * kernel access as indicated in the PTE. + */ + __asm__ __volatile__ ("ori r11, r0, 0x10000000;" \ + "mts rzpr, r11;" + : : : "r11"); +} + +/* + * MMU_init sets up the basic memory mappings for the kernel, + * including both RAM and possibly some I/O regions, + * and sets up the page tables and the MMU hardware ready to go. + */ + +/* called from head.S */ +asmlinkage void __init mmu_init(void) +{ + unsigned int kstart, ksize; + + if (!lmb.reserved.cnt) { + printk(KERN_EMERG "Error memory count\n"); + machine_restart(NULL); + } + + if ((u32) lmb.memory.region[0].size < 0x1000000) { + printk(KERN_EMERG "Memory must be greater than 16MB\n"); + machine_restart(NULL); + } + /* Find main memory where the kernel is */ + memory_start = (u32) lmb.memory.region[0].base; + memory_end = (u32) lmb.memory.region[0].base + + (u32) lmb.memory.region[0].size; + memory_size = memory_end - memory_start; + + mm_cmdline_setup(); /* FIXME parse args from command line - not used */ + + /* + * Map out the kernel text/data/bss from the available physical + * memory. + */ + kstart = __pa(CONFIG_KERNEL_START); /* kernel start */ + /* kernel size */ + ksize = PAGE_ALIGN(((u32)_end - (u32)CONFIG_KERNEL_START)); + lmb_reserve(kstart, ksize); + +#if defined(CONFIG_BLK_DEV_INITRD) + /* Remove the init RAM disk from the available memory. */ +/* if (initrd_start) { + mem_pieces_remove(&phys_avail, __pa(initrd_start), + initrd_end - initrd_start, 1); + }*/ +#endif /* CONFIG_BLK_DEV_INITRD */ + + /* Initialize the MMU hardware */ + mmu_init_hw(); + + /* Map in all of RAM starting at CONFIG_KERNEL_START */ + mapin_ram(); + +#ifdef HIGHMEM_START_BOOL + ioremap_base = HIGHMEM_START; +#else + ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ +#endif /* CONFIG_HIGHMEM */ + ioremap_bot = ioremap_base; + + /* Initialize the context management stuff */ + mmu_context_init(); +} + +/* This is only called until mem_init is done. */ +void __init *early_get_page(void) +{ + void *p; + if (init_bootmem_done) { + p = alloc_bootmem_pages(PAGE_SIZE); + } else { + /* + * Mem start + 32MB -> here is limit + * because of mem mapping from head.S + */ + p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + memory_start + 0x2000000)); + } + return p; +} +#endif /* CONFIG_MMU */ -- cgit v1.2.2 From 23098649e0f8861ea69fac62cf6ba721b83065dc Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:12 +0200 Subject: microblaze_mmu_v2: mmu.h update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/mmu.h | 104 +++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/mmu.h b/arch/microblaze/include/asm/mmu.h index 0e0431d61635..66cad6a99d77 100644 --- a/arch/microblaze/include/asm/mmu.h +++ b/arch/microblaze/include/asm/mmu.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -9,11 +11,109 @@ #ifndef _ASM_MICROBLAZE_MMU_H #define _ASM_MICROBLAZE_MMU_H -#ifndef __ASSEMBLY__ +# ifndef CONFIG_MMU +# ifndef __ASSEMBLY__ typedef struct { struct vm_list_struct *vmlist; unsigned long end_brk; } mm_context_t; -#endif /* __ASSEMBLY__ */ +# endif /* __ASSEMBLY__ */ +# else /* CONFIG_MMU */ +# ifdef __KERNEL__ +# ifndef __ASSEMBLY__ +/* Default "unsigned long" context */ +typedef unsigned long mm_context_t; + +/* Hardware Page Table Entry */ +typedef struct _PTE { + unsigned long v:1; /* Entry is valid */ + unsigned long vsid:24; /* Virtual segment identifier */ + unsigned long h:1; /* Hash algorithm indicator */ + unsigned long api:6; /* Abbreviated page index */ + unsigned long rpn:20; /* Real (physical) page number */ + unsigned long :3; /* Unused */ + unsigned long r:1; /* Referenced */ + unsigned long c:1; /* Changed */ + unsigned long w:1; /* Write-thru cache mode */ + unsigned long i:1; /* Cache inhibited */ + unsigned long m:1; /* Memory coherence */ + unsigned long g:1; /* Guarded */ + unsigned long :1; /* Unused */ + unsigned long pp:2; /* Page protection */ +} PTE; + +/* Values for PP (assumes Ks=0, Kp=1) */ +# define PP_RWXX 0 /* Supervisor read/write, User none */ +# define PP_RWRX 1 /* Supervisor read/write, User read */ +# define PP_RWRW 2 /* Supervisor read/write, User read/write */ +# define PP_RXRX 3 /* Supervisor read, User read */ + +/* Segment Register */ +typedef struct _SEGREG { + unsigned long t:1; /* Normal or I/O type */ + unsigned long ks:1; /* Supervisor 'key' (normally 0) */ + unsigned long kp:1; /* User 'key' (normally 1) */ + unsigned long n:1; /* No-execute */ + unsigned long :4; /* Unused */ + unsigned long vsid:24; /* Virtual Segment Identifier */ +} SEGREG; + +extern void _tlbie(unsigned long va); /* invalidate a TLB entry */ +extern void _tlbia(void); /* invalidate all TLB entries */ +# endif /* __ASSEMBLY__ */ + +/* + * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The + * instruction and data sides share a unified, 64-entry, semi-associative + * TLB which is maintained totally under software control. In addition, the + * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative + * TLB which serves as a first level to the shared TLB. These two TLBs are + * known as the UTLB and ITLB, respectively. + */ + +# define MICROBLAZE_TLB_SIZE 64 + +/* + * TLB entries are defined by a "high" tag portion and a "low" data + * portion. The data portion is 32-bits. + * + * TLB entries are managed entirely under software control by reading, + * writing, and searching using the MTS and MFS instructions. + */ + +# define TLB_LO 1 +# define TLB_HI 0 +# define TLB_DATA TLB_LO +# define TLB_TAG TLB_HI + +/* Tag portion */ +# define TLB_EPN_MASK 0xFFFFFC00 /* Effective Page Number */ +# define TLB_PAGESZ_MASK 0x00000380 +# define TLB_PAGESZ(x) (((x) & 0x7) << 7) +# define PAGESZ_1K 0 +# define PAGESZ_4K 1 +# define PAGESZ_16K 2 +# define PAGESZ_64K 3 +# define PAGESZ_256K 4 +# define PAGESZ_1M 5 +# define PAGESZ_4M 6 +# define PAGESZ_16M 7 +# define TLB_VALID 0x00000040 /* Entry is valid */ + +/* Data portion */ +# define TLB_RPN_MASK 0xFFFFFC00 /* Real Page Number */ +# define TLB_PERM_MASK 0x00000300 +# define TLB_EX 0x00000200 /* Instruction execution allowed */ +# define TLB_WR 0x00000100 /* Writes permitted */ +# define TLB_ZSEL_MASK 0x000000F0 +# define TLB_ZSEL(x) (((x) & 0xF) << 4) +# define TLB_ATTR_MASK 0x0000000F +# define TLB_W 0x00000008 /* Caching is write-through */ +# define TLB_I 0x00000004 /* Caching is inhibited */ +# define TLB_M 0x00000002 /* Memory is coherent */ +# define TLB_G 0x00000001 /* Memory is guarded from prefetch */ + +# endif /* __KERNEL__ */ +# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_MMU_H */ -- cgit v1.2.2 From 5de96121009f4de43ffeb7160109e23132278c07 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:13 +0200 Subject: microblaze_mmu_v2: Page fault handling high level - fault.c Signed-off-by: Michal Simek --- arch/microblaze/mm/fault.c | 304 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 arch/microblaze/mm/fault.c (limited to 'arch') diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c new file mode 100644 index 000000000000..5e67cd1fab40 --- /dev/null +++ b/arch/microblaze/mm/fault.c @@ -0,0 +1,304 @@ +/* + * arch/microblaze/mm/fault.c + * + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. + * + * Derived from "arch/ppc/mm/fault.c" + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_KGDB) +int debugger_kernel_faults = 1; +#endif + +static unsigned long pte_misses; /* updated by do_page_fault() */ +static unsigned long pte_errors; /* updated by do_page_fault() */ + +/* + * Check whether the instruction at regs->pc is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ + unsigned int inst; + + if (get_user(inst, (unsigned int *)regs->pc)) + return 0; + /* check for 1 in the rD field */ + if (((inst >> 21) & 0x1f) != 1) + return 0; + /* check for store opcodes */ + if ((inst & 0xd0000000) == 0xd0000000) + return 1; + return 0; +} + + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from do_page_fault above and from some of the procedures + * in traps.c. + */ +static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *fixup; +/* MS: no context */ + /* Are we prepared to handle this fault? */ + fixup = search_exception_tables(regs->pc); + if (fixup) { + regs->pc = fixup->fixup; + return; + } + + /* kernel has accessed a bad area */ +#if defined(CONFIG_KGDB) + if (debugger_kernel_faults) + debugger(regs); +#endif + die("kernel access of bad area", regs, sig); +} + +/* + * The error_code parameter is ESR for a data fault, + * 0 for an instruction fault. + */ +void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + int code = SEGV_MAPERR; + int is_write = error_code & ESR_S; + int fault; + + regs->ear = address; + regs->esr = error_code; + + /* On a kernel SLB miss we can only check for a valid exception entry */ + if (kernel_mode(regs) && (address >= TASK_SIZE)) { + printk(KERN_WARNING "kernel task_size exceed"); + _exception(SIGSEGV, regs, code, address); + } + + /* for instr TLB miss and instr storage exception ESR_S is undefined */ + if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) + is_write = 0; + +#if defined(CONFIG_KGDB) + if (debugger_fault_handler && regs->trap == 0x300) { + debugger_fault_handler(regs); + return; + } +#endif /* CONFIG_KGDB */ + + if (in_atomic() || mm == NULL) { + /* FIXME */ + if (kernel_mode(regs)) { + printk(KERN_EMERG + "Page fault in kernel mode - Oooou!!! pid %d\n", + current->pid); + _exception(SIGSEGV, regs, code, address); + return; + } + /* in_atomic() in user mode is really bad, + as is current->mm == NULL. */ + printk(KERN_EMERG "Page fault in user mode with " + "in_atomic(), mm = %p\n", mm); + printk(KERN_EMERG "r15 = %lx MSR = %lx\n", + regs->r15, regs->msr); + die("Weird page fault", regs, SIGSEGV); + } + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an + * erroneous fault occurring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibility of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (kernel_mode(regs) && !search_exception_tables(regs->pc)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + if (vma->vm_start <= address) + goto good_area; + + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + + if (!is_write) + goto bad_area; + + /* + * N.B. The ABI allows programs to access up to + * a few hundred bytes below the stack pointer (TBD). + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + goto bad_area; + + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->r1 + && (kernel_mode(regs) || !store_updates_sp(regs))) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; + +good_area: + code = SEGV_ACCERR; + + /* a write */ + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + /* protection fault */ + if (error_code & 0x08000000) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ +survive: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + up_read(&mm->mmap_sem); + /* + * keep track of tlb+htab misses that are good addrs but + * just need pte's created via handle_mm_fault() + * -- Cort + */ + pte_misses++; + return; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + pte_errors++; + + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + _exception(SIGSEGV, regs, code, address); +/* info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (void *) address; + force_sig_info(SIGSEGV, &info, current);*/ + return; + } + + bad_page_fault(regs, address, SIGSEGV); + return; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + up_read(&mm->mmap_sem); + printk(KERN_WARNING "VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + bad_page_fault(regs, address, SIGKILL); + return; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return; + } + bad_page_fault(regs, address, SIGBUS); +} -- cgit v1.2.2 From fc34d1eb1ca09d3450508e2cf9cf511364c2c460 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:14 +0200 Subject: microblaze_mmu_v2: Context handling - mmu_context.c/h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/mmu_context.h | 26 +---- arch/microblaze/include/asm/mmu_context_mm.h | 140 +++++++++++++++++++++++++++ arch/microblaze/include/asm/mmu_context_no.h | 23 +++++ arch/microblaze/mm/mmu_context.c | 70 ++++++++++++++ 4 files changed, 238 insertions(+), 21 deletions(-) create mode 100644 arch/microblaze/include/asm/mmu_context_mm.h create mode 100644 arch/microblaze/include/asm/mmu_context_no.h create mode 100644 arch/microblaze/mm/mmu_context.c (limited to 'arch') diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h index 150ca01b74ba..385fed16bbfb 100644 --- a/arch/microblaze/include/asm/mmu_context.h +++ b/arch/microblaze/include/asm/mmu_context.h @@ -1,21 +1,5 @@ -/* - * Copyright (C) 2006 Atmark Techno, Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H -#define _ASM_MICROBLAZE_MMU_CONTEXT_H - -# define init_new_context(tsk, mm) ({ 0; }) - -# define enter_lazy_tlb(mm, tsk) do {} while (0) -# define change_mm_context(old, ctx, _pml4) do {} while (0) -# define destroy_context(mm) do {} while (0) -# define deactivate_mm(tsk, mm) do {} while (0) -# define switch_mm(prev, next, tsk) do {} while (0) -# define activate_mm(prev, next) do {} while (0) - -#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */ +#ifdef CONFIG_MMU +# include "mmu_context_mm.h" +#else +# include "mmu_context_no.h" +#endif diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h new file mode 100644 index 000000000000..3e5c254e8d1c --- /dev/null +++ b/arch/microblaze/include/asm/mmu_context_mm.h @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2006 Atmark Techno, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H +#define _ASM_MICROBLAZE_MMU_CONTEXT_H + +#include +#include +#include +#include + +# ifdef __KERNEL__ +/* + * This function defines the mapping from contexts to VSIDs (virtual + * segment IDs). We use a skew on both the context and the high 4 bits + * of the 32-bit virtual address (the "effective segment ID") in order + * to spread out the entries in the MMU hash table. + */ +# define CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \ + & 0xffffff) + +/* + MicroBlaze has 256 contexts, so we can just rotate through these + as a way of "switching" contexts. If the TID of the TLB is zero, + the PID/TID comparison is disabled, so we can use a TID of zero + to represent all kernel pages as shared among all contexts. + */ + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + +# define NO_CONTEXT 256 +# define LAST_CONTEXT 255 +# define FIRST_CONTEXT 1 + +/* + * Set the current MMU context. + * This is done byloading up the segment registers for the user part of the + * address space. + * + * Since the PGD is immediately available, it is much faster to simply + * pass this along as a second parameter, which is required for 8xx and + * can be used for debugging on all processors (if you happen to have + * an Abatron). + */ +extern void set_context(mm_context_t context, pgd_t *pgd); + +/* + * Bitmap of contexts in use. + * The size of this bitmap is LAST_CONTEXT + 1 bits. + */ +extern unsigned long context_map[]; + +/* + * This caches the next context number that we expect to be free. + * Its use is an optimization only, we can't rely on this context + * number to be free, but it usually will be. + */ +extern mm_context_t next_mmu_context; + +/* + * Since we don't have sufficient contexts to give one to every task + * that could be in the system, we need to be able to steal contexts. + * These variables support that. + */ +extern atomic_t nr_free_contexts; +extern struct mm_struct *context_mm[LAST_CONTEXT+1]; +extern void steal_context(void); + +/* + * Get a new mmu context for the address space described by `mm'. + */ +static inline void get_mmu_context(struct mm_struct *mm) +{ + mm_context_t ctx; + + if (mm->context != NO_CONTEXT) + return; + while (atomic_dec_if_positive(&nr_free_contexts) < 0) + steal_context(); + ctx = next_mmu_context; + while (test_and_set_bit(ctx, context_map)) { + ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx); + if (ctx > LAST_CONTEXT) + ctx = 0; + } + next_mmu_context = (ctx + 1) & LAST_CONTEXT; + mm->context = ctx; + context_mm[ctx] = mm; +} + +/* + * Set up the context for a new address space. + */ +# define init_new_context(tsk, mm) (((mm)->context = NO_CONTEXT), 0) + +/* + * We're finished using the context for an address space. + */ +static inline void destroy_context(struct mm_struct *mm) +{ + if (mm->context != NO_CONTEXT) { + clear_bit(mm->context, context_map); + mm->context = NO_CONTEXT; + atomic_inc(&nr_free_contexts); + } +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + tsk->thread.pgdir = next->pgd; + get_mmu_context(next); + set_context(next->context, next->pgd); +} + +/* + * After we have set current->mm to a new value, this activates + * the context for the new mm so we see the new mappings. + */ +static inline void activate_mm(struct mm_struct *active_mm, + struct mm_struct *mm) +{ + current->thread.pgdir = mm->pgd; + get_mmu_context(mm); + set_context(mm->context, mm->pgd); +} + +extern void mmu_context_init(void); + +# endif /* __KERNEL__ */ +#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */ diff --git a/arch/microblaze/include/asm/mmu_context_no.h b/arch/microblaze/include/asm/mmu_context_no.h new file mode 100644 index 000000000000..ba5567190154 --- /dev/null +++ b/arch/microblaze/include/asm/mmu_context_no.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2006 Atmark Techno, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H +#define _ASM_MICROBLAZE_MMU_CONTEXT_H + +# define init_new_context(tsk, mm) ({ 0; }) + +# define enter_lazy_tlb(mm, tsk) do {} while (0) +# define change_mm_context(old, ctx, _pml4) do {} while (0) +# define destroy_context(mm) do {} while (0) +# define deactivate_mm(tsk, mm) do {} while (0) +# define switch_mm(prev, next, tsk) do {} while (0) +# define activate_mm(prev, next) do {} while (0) + +#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */ diff --git a/arch/microblaze/mm/mmu_context.c b/arch/microblaze/mm/mmu_context.c new file mode 100644 index 000000000000..26ff82f4fa8f --- /dev/null +++ b/arch/microblaze/mm/mmu_context.c @@ -0,0 +1,70 @@ +/* + * This file contains the routines for handling the MMU. + * + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. + * + * Derived from arch/ppc/mm/4xx_mmu.c: + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include + +mm_context_t next_mmu_context; +unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; +atomic_t nr_free_contexts; +struct mm_struct *context_mm[LAST_CONTEXT+1]; + +/* + * Initialize the context management stuff. + */ +void __init mmu_context_init(void) +{ + /* + * The use of context zero is reserved for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; + atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); +} + +/* + * Steal a context from a task that has one at the moment. + * + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone were motivated to do it. + */ +void steal_context(void) +{ + struct mm_struct *mm; + + /* free up context `next_mmu_context' */ + /* if we shouldn't free context 0, don't... */ + if (next_mmu_context < FIRST_CONTEXT) + next_mmu_context = FIRST_CONTEXT; + mm = context_mm[next_mmu_context]; + flush_tlb_mm(mm); + destroy_context(mm); +} -- cgit v1.2.2 From 15902bf63c8332946e5a1f48a72e3ae22874b11b Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:15 +0200 Subject: microblaze_mmu_v2: Page table - ioremap - pgtable.c/h, section update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/pgtable.h | 536 +++++++++++++++++++++++++++++++++ arch/microblaze/include/asm/sections.h | 3 + arch/microblaze/mm/pgtable.c | 286 ++++++++++++++++++ 3 files changed, 825 insertions(+) create mode 100644 arch/microblaze/mm/pgtable.c (limited to 'arch') diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 254fd4ba733b..4c57a586a989 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -14,6 +16,8 @@ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) +#ifndef CONFIG_MMU + #define pgd_present(pgd) (1) /* pages are always present on non MMU */ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) @@ -47,6 +51,538 @@ static inline int pte_file(pte_t pte) { return 0; } #define arch_enter_lazy_cpu_mode() do {} while (0) +#else /* CONFIG_MMU */ + +#include + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include +#include /* For TASK_SIZE */ +#include +#include + +#define FIRST_USER_ADDRESS 0 + +extern unsigned long va_to_phys(unsigned long address); +extern pte_t *va_to_pte(unsigned long address); +extern unsigned long ioremap_bot, ioremap_base; + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ + +static inline int pte_special(pte_t pte) { return 0; } + +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + +/* Start and end of the vmalloc area. */ +/* Make sure to map the vmalloc area above the pinned kernel memory area + of 32Mb. */ +#define VMALLOC_START (CONFIG_KERNEL_START + \ + max(32 * 1024 * 1024UL, memory_size)) +#define VMALLOC_END ioremap_bot +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) + +#endif /* __ASSEMBLY__ */ + +/* + * The MicroBlaze MMU is identical to the PPC-40x MMU, and uses a hash + * table containing PTEs, together with a set of 16 segment registers, to + * define the virtual to physical address mapping. + * + * We use the hash table as an extended TLB, i.e. a cache of currently + * active mappings. We maintain a two-level page table tree, much + * like that used by the i386, for the sake of the Linux memory + * management code. Low-level assembler code in hashtable.S + * (procedure hash_page) is responsible for extracting ptes from the + * tree and putting them into the hash table when necessary, and + * updating the accessed and modified bits in the page table tree. + */ + +/* + * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The + * instruction and data sides share a unified, 64-entry, semi-associative + * TLB which is maintained totally under software control. In addition, the + * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative + * TLB which serves as a first level to the shared TLB. These two TLBs are + * known as the UTLB and ITLB, respectively (see "mmu.h" for definitions). + */ + +/* + * The normal case is that PTEs are 32-bits and we have a 1-page + * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus + * + */ + +/* PMD_SHIFT determines the size of the area mapped by the PTE pages */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a top-level page table entry can map */ +#define PGDIR_SHIFT PMD_SHIFT +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: our page-table tree is two-level, so + * we don't really have any PMD directory. + */ +#define PTRS_PER_PTE (1 << PTE_SHIFT) +#define PTRS_PER_PMD 1 +#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) + +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +#define FIRST_USER_PGD_NR 0 + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define pte_ERROR(e) \ + printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \ + __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \ + __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \ + __FILE__, __LINE__, pgd_val(e)) + +/* + * Bits in a linux-style PTE. These match the bits in the + * (hardware-defined) PTE as closely as possible. + */ + +/* There are several potential gotchas here. The hardware TLBLO + * field looks like this: + * + * 0 1 2 3 4 ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + * RPN..................... 0 0 EX WR ZSEL....... W I M G + * + * Where possible we make the Linux PTE bits match up with this + * + * - bits 20 and 21 must be cleared, because we use 4k pages (4xx can + * support down to 1k pages), this is done in the TLBMiss exception + * handler. + * - We use only zones 0 (for kernel pages) and 1 (for user pages) + * of the 16 available. Bit 24-26 of the TLB are cleared in the TLB + * miss handler. Bit 27 is PAGE_USER, thus selecting the correct + * zone. + * - PRESENT *must* be in the bottom two bits because swap cache + * entries use the top 30 bits. Because 4xx doesn't support SMP + * anyway, M is irrelevant so we borrow it for PAGE_PRESENT. Bit 30 + * is cleared in the TLB miss handler before the TLB entry is loaded. + * - All other bits of the PTE are loaded into TLBLO without + * * modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for + * software PTE bits. We actually use use bits 21, 24, 25, and + * 30 respectively for the software bits: ACCESSED, DIRTY, RW, and + * PRESENT. + */ + +/* Definitions for MicroBlaze. */ +#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */ +#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ +#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ +#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ +#define _PAGE_USER 0x010 /* matches one of the zone permission bits */ +#define _PAGE_RW 0x040 /* software: Writes permitted */ +#define _PAGE_DIRTY 0x080 /* software: dirty page */ +#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ +#define _PAGE_HWEXEC 0x200 /* hardware: EX permission */ +#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ +#define _PMD_PRESENT PAGE_MASK + +/* + * Some bits are unused... + */ +#ifndef _PAGE_HASHPTE +#define _PAGE_HASHPTE 0 +#endif +#ifndef _PTE_NONE_MASK +#define _PTE_NONE_MASK 0 +#endif +#ifndef _PAGE_SHARED +#define _PAGE_SHARED 0 +#endif +#ifndef _PAGE_HWWRITE +#define _PAGE_HWWRITE 0 +#endif +#ifndef _PAGE_HWEXEC +#define _PAGE_HWEXEC 0 +#endif +#ifndef _PAGE_EXEC +#define _PAGE_EXEC 0 +#endif + +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +/* + * Note: the _PAGE_COHERENT bit automatically gets set in the hardware + * PTE if CONFIG_SMP is defined (hash_page does this); there is no need + * to have it in the Linux PTE, and in fact the bit could be reused for + * another purpose. -- paulus. + */ +#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED) +#define _PAGE_WRENABLE (_PAGE_RW | _PAGE_DIRTY | _PAGE_HWWRITE) + +#define _PAGE_KERNEL \ + (_PAGE_BASE | _PAGE_WRENABLE | _PAGE_SHARED | _PAGE_HWEXEC) + +#define _PAGE_IO (_PAGE_KERNEL | _PAGE_NO_CACHE | _PAGE_GUARDED) + +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X \ + __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) + +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_SHARED) +#define PAGE_KERNEL_CI __pgprot(_PAGE_IO) + +/* + * We consider execute permission the same as read. + * Also, write permissions imply read permissions. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY_X +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY_X +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY_X +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED_X +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED_X + +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[1024]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#endif /* __ASSEMBLY__ */ + +#define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0) +#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) +#define pte_clear(mm, addr, ptep) \ + do { set_pte_at((mm), (addr), (ptep), __pte(0)); } while (0) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) ((pmd_val(pmd) & _PMD_PRESENT) == 0) +#define pmd_present(pmd) ((pmd_val(pmd) & _PMD_PRESENT) != 0) +#define pmd_clear(pmdp) do { pmd_val(*(pmdp)) = 0; } while (0) + +#define pte_page(x) (mem_map + (unsigned long) \ + ((pte_val(x) - memory_start) >> PAGE_SHIFT)) +#define PFN_SHIFT_OFFSET (PAGE_SHIFT) + +#define pte_pfn(x) (pte_val(x) >> PFN_SHIFT_OFFSET) + +#define pfn_pte(pfn, prot) \ + __pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) | pgprot_val(prot)) + +#ifndef __ASSEMBLY__ +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +#define pgd_clear(xp) do { } while (0) +#define pgd_page(pgd) \ + ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } +static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +/* FIXME */ +static inline int pte_file(pte_t pte) { return 0; } + +static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } +static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } + +static inline pte_t pte_rdprotect(pte_t pte) \ + { pte_val(pte) &= ~_PAGE_USER; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) \ + { pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; } +static inline pte_t pte_exprotect(pte_t pte) \ + { pte_val(pte) &= ~_PAGE_EXEC; return pte; } +static inline pte_t pte_mkclean(pte_t pte) \ + { pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; } +static inline pte_t pte_mkold(pte_t pte) \ + { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } + +static inline pte_t pte_mkread(pte_t pte) \ + { pte_val(pte) |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) \ + { pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) \ + { pte_val(pte) |= _PAGE_RW; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) \ + { pte_val(pte) |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) \ + { pte_val(pte) |= _PAGE_ACCESSED; return pte; } + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +static inline pte_t mk_pte_phys(phys_addr_t physpage, pgprot_t pgprot) +{ + pte_t pte; + pte_val(pte) = physpage | pgprot_val(pgprot); + return pte; +} + +#define mk_pte(page, pgprot) \ +({ \ + pte_t pte; \ + pte_val(pte) = (((page - mem_map) << PAGE_SHIFT) + memory_start) | \ + pgprot_val(pgprot); \ + pte; \ +}) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); + return pte; +} + +/* + * Atomic PTE updates. + * + * pte_update clears and sets bit atomically, and returns + * the old pte value. + * The ((unsigned long)(p+1) - 4) hack is to get to the least-significant + * 32 bits of the PTE regardless of whether PTEs are 32 or 64 bits. + */ +static inline unsigned long pte_update(pte_t *p, unsigned long clr, + unsigned long set) +{ + unsigned long old, tmp, msr; + + __asm__ __volatile__("\ + msrclr %2, 0x2\n\ + nop\n\ + lw %0, %4, r0\n\ + andn %1, %0, %5\n\ + or %1, %1, %6\n\ + sw %1, %4, r0\n\ + mts rmsr, %2\n\ + nop" + : "=&r" (old), "=&r" (tmp), "=&r" (msr), "=m" (*p) + : "r" ((unsigned long)(p+1) - 4), "r" (clr), "r" (set), "m" (*p) + : "cc"); + + return old; +} + +/* + * set_pte stores a linux PTE into the linux page table. + */ +static inline void set_pte(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; +} + +static inline int ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0; +} + +static inline int ptep_test_and_clear_dirty(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return (pte_update(ptep, \ + (_PAGE_DIRTY | _PAGE_HWWRITE), 0) & _PAGE_DIRTY) != 0; +} + +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0)); +} + +/*static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0); +}*/ + +static inline void ptep_mkdirty(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_update(ptep, 0, _PAGE_DIRTY); +} + +/*#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)*/ + +/* Convert pmd entry to page */ +/* our pmd entry is an effective address of pte table*/ +/* returns effective address of the pmd entry*/ +#define pmd_page_kernel(pmd) ((unsigned long) (pmd_val(pmd) & PAGE_MASK)) + +/* returns struct *page of the pmd entry*/ +#define pmd_page(pmd) (pfn_to_page(__pa(pmd_val(pmd)) >> PAGE_SHIFT)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* to find an entry in a page-table-directory */ +#define pgd_index(address) ((address) >> PGDIR_SHIFT) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +/* Find an entry in the second-level page table.. */ +static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address) +{ + return (pmd_t *) dir; +} + +/* Find an entry in the third-level page table.. */ +#define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, addr) \ + ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) +#define pte_offset_map(dir, addr) \ + ((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr)) +#define pte_offset_map_nested(dir, addr) \ + ((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE1) + pte_index(addr)) + +#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) +#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + +/* Encode and decode a nonlinear file mapping entry */ +#define PTE_FILE_MAX_BITS 29 +#define pte_to_pgoff(pte) (pte_val(pte) >> 3) +#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) }) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* + * When flushing the tlb entry for a page, we also need to flush the hash + * table entry. flush_hash_page is assembler (for speed) in hashtable.S. + */ +extern int flush_hash_page(unsigned context, unsigned long va, pte_t *ptep); + +/* Add an HPTE to the hash table */ +extern void add_hash_page(unsigned context, unsigned long va, pte_t *ptep); + +/* + * Encode and decode a swap entry. + * Note that the bits we use in a PTE for representing a swap entry + * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit + * (if used). -- paulus + */ +#define __swp_type(entry) ((entry).val & 0x3f) +#define __swp_offset(entry) ((entry).val >> 6) +#define __swp_entry(type, offset) \ + ((swp_entry_t) { (type) | ((offset) << 6) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) + + +/* CONFIG_APUS */ +/* For virtual address to physical address conversion */ +extern void cache_clear(__u32 addr, int length); +extern void cache_push(__u32 addr, int length); +extern int mm_end_of_chunk(unsigned long addr, int len); +extern unsigned long iopa(unsigned long addr); +/* extern unsigned long mm_ptov(unsigned long addr) \ + __attribute__ ((const)); TBD */ + +/* Values for nocacheflag and cmode */ +/* These are not used by the APUS kernel_map, but prevents + * compilation errors. + */ +#define IOMAP_FULL_CACHING 0 +#define IOMAP_NOCACHE_SER 1 +#define IOMAP_NOCACHE_NONSER 2 +#define IOMAP_NO_COPYBACK 3 + +/* + * Map some physical address range into the kernel address space. + */ +extern unsigned long kernel_map(unsigned long paddr, unsigned long size, + int nocacheflag, unsigned long *memavailp); + +/* + * Set cache mode of (kernel space) address range. + */ +extern void kernel_set_cachemode(unsigned long address, unsigned long size, + unsigned int cmode); + +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +#define kern_addr_valid(addr) (1) + +#define io_remap_page_range remap_page_range + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code); + +void __init io_block_mapping(unsigned long virt, phys_addr_t phys, + unsigned int size, int flags); + +void __init adjust_total_lowmem(void); +void mapin_ram(void); +int map_page(unsigned long va, phys_addr_t pa, int flags); + +extern int mem_init_done; +extern unsigned long ioremap_base; +extern unsigned long ioremap_bot; + +asmlinkage void __init mmu_init(void); + +void __init *early_get_page(void); + +void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle); +void consistent_free(void *vaddr); +void consistent_sync(void *vaddr, size_t size, int direction); +void consistent_sync_page(struct page *page, unsigned long offset, + size_t size, int direction); +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* CONFIG_MMU */ + #ifndef __ASSEMBLY__ #include diff --git a/arch/microblaze/include/asm/sections.h b/arch/microblaze/include/asm/sections.h index 8434a43e5421..4487e150b455 100644 --- a/arch/microblaze/include/asm/sections.h +++ b/arch/microblaze/include/asm/sections.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -14,6 +16,7 @@ # ifndef __ASSEMBLY__ extern char _ssbss[], _esbss[]; extern unsigned long __ivt_start[], __ivt_end[]; +extern char _etext[], _stext[]; # ifdef CONFIG_MTD_UCLINUX extern char *_ebss; diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c new file mode 100644 index 000000000000..46c4ca5d15c5 --- /dev/null +++ b/arch/microblaze/mm/pgtable.c @@ -0,0 +1,286 @@ +/* + * This file contains the routines setting up the linux page tables. + * + * Copyright (C) 2008 Michal Simek + * Copyright (C) 2008 PetaLogix + * + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. + * + * Derived from arch/ppc/mm/pgtable.c: + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define flush_HPTE(X, va, pg) _tlbie(va) + +unsigned long ioremap_base; +unsigned long ioremap_bot; + +/* The maximum lowmem defaults to 768Mb, but this can be configured to + * another value. + */ +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +#ifndef CONFIG_SMP +struct pgtable_cache_struct quicklists; +#endif + +static void __iomem *__ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags) +{ + unsigned long v, i; + phys_addr_t p; + int err; + + /* + * Choose an address to map it to. + * Once the vmalloc system is running, we use it. + * Before then, we use space going down from ioremap_base + * (ioremap_bot records where we're up to). + */ + p = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - p; + + /* + * Don't allow anybody to remap normal RAM that we're using. + * mem_init() sets high_memory so only do the check after that. + * + * However, allow remap of rootfs: TBD + */ + if (mem_init_done && + p >= memory_start && p < virt_to_phys(high_memory) && + !(p >= virt_to_phys((unsigned long)&__bss_stop) && + p < virt_to_phys((unsigned long)__bss_stop))) { + printk(KERN_WARNING "__ioremap(): phys addr "PTE_FMT + " is RAM lr %p\n", (unsigned long)p, + __builtin_return_address(0)); + return NULL; + } + + if (size == 0) + return NULL; + + /* + * Is it already mapped? If the whole area is mapped then we're + * done, otherwise remap it since we want to keep the virt addrs for + * each request contiguous. + * + * We make the assumption here that if the bottom and top + * of the range we want are mapped then it's mapped to the + * same virt address (and this is contiguous). + * -- Cort + */ + + if (mem_init_done) { + struct vm_struct *area; + area = get_vm_area(size, VM_IOREMAP); + if (area == NULL) + return NULL; + v = VMALLOC_VMADDR(area->addr); + } else { + v = (ioremap_bot -= size); + } + + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + if (flags & _PAGE_NO_CACHE) + flags |= _PAGE_GUARDED; + + err = 0; + for (i = 0; i < size && err == 0; i += PAGE_SIZE) + err = map_page(v + i, p + i, flags); + if (err) { + if (mem_init_done) + vfree((void *)v); + return NULL; + } + + return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); +} + +void __iomem *ioremap(phys_addr_t addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} +EXPORT_SYMBOL(ioremap); + +void iounmap(void *addr) +{ + if (addr > high_memory && (unsigned long) addr < ioremap_bot) + vfree((void *) (PAGE_MASK & (unsigned long) addr)); +} +EXPORT_SYMBOL(iounmap); + + +int map_page(unsigned long va, phys_addr_t pa, int flags) +{ + pmd_t *pd; + pte_t *pg; + int err = -ENOMEM; + /* spin_lock(&init_mm.page_table_lock); */ + /* Use upper 10 bits of VA to index the first level map */ + pd = pmd_offset(pgd_offset_k(va), va); + /* Use middle 10 bits of VA to index the second-level map */ + pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */ + /* pg = pte_alloc_kernel(&init_mm, pd, va); */ + + if (pg != NULL) { + err = 0; + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, + __pgprot(flags))); + if (mem_init_done) + flush_HPTE(0, va, pmd_val(*pd)); + /* flush_HPTE(0, va, pg); */ + + } + /* spin_unlock(&init_mm.page_table_lock); */ + return err; +} + +void __init adjust_total_lowmem(void) +{ +/* TBD */ +#if 0 + unsigned long max_low_mem = MAX_LOW_MEM; + + if (total_lowmem > max_low_mem) { + total_lowmem = max_low_mem; +#ifndef CONFIG_HIGHMEM + printk(KERN_INFO "Warning, memory limited to %ld Mb, use " + "CONFIG_HIGHMEM to reach %ld Mb\n", + max_low_mem >> 20, total_memory >> 20); + total_memory = total_lowmem; +#endif /* CONFIG_HIGHMEM */ + } +#endif +} + +static void show_tmem(unsigned long tmem) +{ + volatile unsigned long a; + a = a + tmem; +} + +/* + * Map in all of physical memory starting at CONFIG_KERNEL_START. + */ +void __init mapin_ram(void) +{ + unsigned long v, p, s, f; + + v = CONFIG_KERNEL_START; + p = memory_start; + show_tmem(memory_size); + for (s = 0; s < memory_size; s += PAGE_SIZE) { + f = _PAGE_PRESENT | _PAGE_ACCESSED | + _PAGE_SHARED | _PAGE_HWEXEC; + if ((char *) v < _stext || (char *) v >= _etext) + f |= _PAGE_WRENABLE; + else + /* On the MicroBlaze, no user access + forces R/W kernel access */ + f |= _PAGE_USER; + map_page(v, p, f); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + +/* is x a power of 2? */ +#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) + +/* + * Set up a mapping for a block of I/O. + * virt, phys, size must all be page-aligned. + * This should only be called before ioremap is called. + */ +void __init io_block_mapping(unsigned long virt, phys_addr_t phys, + unsigned int size, int flags) +{ + int i; + + if (virt > CONFIG_KERNEL_START && virt < ioremap_bot) + ioremap_bot = ioremap_base = virt; + + /* Put it in the page tables. */ + for (i = 0; i < size; i += PAGE_SIZE) + map_page(virt + i, phys + i, flags); +} + +/* Scan the real Linux page tables and return a PTE pointer for + * a virtual address in a context. + * Returns true (1) if PTE was found, zero otherwise. The pointer to + * the PTE pointer is unmodified if PTE is not found. + */ +static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int retval = 0; + + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pgd, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_kernel(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + } + } + } + return retval; +} + +/* Find physical address for this virtual address. Normally used by + * I/O functions, but anyone can call it. + */ +unsigned long iopa(unsigned long addr) +{ + unsigned long pa; + + pte_t *pte; + struct mm_struct *mm; + + /* Allow mapping of user addresses (within the thread) + * for DMA if necessary. + */ + if (addr < TASK_SIZE) + mm = current->mm; + else + mm = &init_mm; + + pa = 0; + if (get_pteptr(mm, addr, &pte)) + pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); + + return pa; +} -- cgit v1.2.2 From dc95be1f7188f0718ac922b6b6b72406c294d250 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:16 +0200 Subject: microblaze_mmu_v2: io.h MMU update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/io.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 8b5853ee6b5c..5c173424d074 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2007-2009 Michal Simek + * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -12,6 +14,9 @@ #include #include #include +#include +#include /* Get struct page {...} */ + #define IO_SPACE_LIMIT (0xFFFFFFFF) @@ -112,6 +117,30 @@ static inline void writel(unsigned int v, volatile void __iomem *addr) #define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c)) #define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c)) +#ifdef CONFIG_MMU + +#define mm_ptov(addr) ((void *)__phys_to_virt(addr)) +#define mm_vtop(addr) ((unsigned long)__virt_to_phys(addr)) +#define phys_to_virt(addr) ((void *)__phys_to_virt(addr)) +#define virt_to_phys(addr) ((unsigned long)__virt_to_phys(addr)) +#define virt_to_bus(addr) ((unsigned long)__virt_to_phys(addr)) + +#define __page_address(page) \ + (PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT)) +#define page_to_phys(page) virt_to_phys((void *)__page_address(page)) +#define page_to_bus(page) (page_to_phys(page)) +#define bus_to_virt(addr) (phys_to_virt(addr)) + +extern void iounmap(void *addr); +/*extern void *__ioremap(phys_addr_t address, unsigned long size, + unsigned long flags);*/ +extern void __iomem *ioremap(phys_addr_t address, unsigned long size); +#define ioremap_writethrough(addr, size) ioremap((addr), (size)) +#define ioremap_nocache(addr, size) ioremap((addr), (size)) +#define ioremap_fullcache(addr, size) ioremap((addr), (size)) + +#else /* CONFIG_MMU */ + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap @@ -160,6 +189,8 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size, #define iounmap(addr) ((void)0) #define ioremap_nocache(physaddr, size) ioremap(physaddr, size) +#endif /* CONFIG_MMU */ + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access -- cgit v1.2.2 From 1f84e1ea0e87ad659cd6f6a6285d50c73a8d1a24 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:17 +0200 Subject: microblaze_mmu_v2: pgalloc.h and page.h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/page.h | 166 +++++++++++++++++++++-------- arch/microblaze/include/asm/pgalloc.h | 191 ++++++++++++++++++++++++++++++++++ 2 files changed, 314 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 7238dcfcc517..210e584974f7 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -1,6 +1,8 @@ /* - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2008 PetaLogix + * VM ops + * + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * Changes for MMU support: * Copyright (C) 2007 Xilinx, Inc. All rights reserved. @@ -15,14 +17,15 @@ #include #include +#include + +#ifdef __KERNEL__ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT (12) -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) @@ -35,6 +38,7 @@ /* align addr on a size boundary - adjust address up if needed */ #define _ALIGN(addr, size) _ALIGN_UP(addr, size) +#ifndef CONFIG_MMU /* * PAGE_OFFSET -- the first address of the first page of memory. When not * using MMU this corresponds to the first free page in physical memory (aligned @@ -43,15 +47,44 @@ extern unsigned int __page_offset; #define PAGE_OFFSET __page_offset -#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) -#define get_user_page(vaddr) __get_free_page(GFP_KERNEL) -#define free_user_page(page, addr) free_page(addr) +#else /* CONFIG_MMU */ -#define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) +/* + * PAGE_OFFSET -- the first address of the first page of memory. With MMU + * it is set to the kernel start address (aligned on a page boundary). + * + * CONFIG_KERNEL_START is defined in arch/microblaze/config.in and used + * in arch/microblaze/Makefile. + */ +#define PAGE_OFFSET CONFIG_KERNEL_START +/* + * MAP_NR -- given an address, calculate the index of the page struct which + * points to the address's page. + */ +#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT) -#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) -#define copy_user_page(vto, vfrom, vaddr, topg) \ +/* + * The basic type of a PTE - 32 bit physical addressing. + */ +typedef unsigned long pte_basic_t; +#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ +#define PTE_FMT "%.8lx" + +#endif /* CONFIG_MMU */ + +# ifndef CONFIG_MMU +# define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) +# define get_user_page(vaddr) __get_free_page(GFP_KERNEL) +# define free_user_page(page, addr) free_page(addr) +# else /* CONFIG_MMU */ +extern void copy_page(void *to, void *from); +# endif /* CONFIG_MMU */ + +# define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) + +# define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) +# define copy_user_page(vto, vfrom, vaddr, topg) \ memcpy((vto), (vfrom), PAGE_SIZE) /* @@ -60,21 +93,32 @@ extern unsigned int __page_offset; typedef struct page *pgtable_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgprot; } pgprot_t; +/* FIXME this can depend on linux kernel version */ +# ifdef CONFIG_MMU +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +# else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; typedef struct { pud_t pge[1]; } pgd_t; +# endif /* CONFIG_MMU */ +# define pte_val(x) ((x).pte) +# define pgprot_val(x) ((x).pgprot) -#define pte_val(x) ((x).pte) -#define pgprot_val(x) ((x).pgprot) -#define pmd_val(x) ((x).ste[0]) -#define pud_val(x) ((x).pue[0]) -#define pgd_val(x) ((x).pge[0]) +# ifdef CONFIG_MMU +# define pmd_val(x) ((x).pmd) +# define pgd_val(x) ((x).pgd) +# else /* CONFIG_MMU */ +# define pmd_val(x) ((x).ste[0]) +# define pud_val(x) ((x).pue[0]) +# define pgd_val(x) ((x).pge[0]) +# endif /* CONFIG_MMU */ -#define __pte(x) ((pte_t) { (x) }) -#define __pmd(x) ((pmd_t) { (x) }) -#define __pgd(x) ((pgd_t) { (x) }) -#define __pgprot(x) ((pgprot_t) { (x) }) +# define __pte(x) ((pte_t) { (x) }) +# define __pmd(x) ((pmd_t) { (x) }) +# define __pgd(x) ((pgd_t) { (x) }) +# define __pgprot(x) ((pgprot_t) { (x) }) /** * Conversions for virtual address, physical address, pfn, and struct @@ -94,44 +138,80 @@ extern unsigned long max_low_pfn; extern unsigned long min_low_pfn; extern unsigned long max_pfn; -#define __pa(vaddr) ((unsigned long) (vaddr)) -#define __va(paddr) ((void *) (paddr)) +extern unsigned long memory_start; +extern unsigned long memory_end; +extern unsigned long memory_size; -#define phys_to_pfn(phys) (PFN_DOWN(phys)) -#define pfn_to_phys(pfn) (PFN_PHYS(pfn)) +extern int page_is_ram(unsigned long pfn); -#define virt_to_pfn(vaddr) (phys_to_pfn((__pa(vaddr)))) -#define pfn_to_virt(pfn) __va(pfn_to_phys((pfn))) +# define phys_to_pfn(phys) (PFN_DOWN(phys)) +# define pfn_to_phys(pfn) (PFN_PHYS(pfn)) -#define virt_to_page(vaddr) (pfn_to_page(virt_to_pfn(vaddr))) -#define page_to_virt(page) (pfn_to_virt(page_to_pfn(page))) +# define virt_to_pfn(vaddr) (phys_to_pfn((__pa(vaddr)))) +# define pfn_to_virt(pfn) __va(pfn_to_phys((pfn))) -#define page_to_phys(page) (pfn_to_phys(page_to_pfn(page))) -#define page_to_bus(page) (page_to_phys(page)) -#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) +# ifdef CONFIG_MMU +# define virt_to_page(kaddr) (mem_map + MAP_NR(kaddr)) +# else /* CONFIG_MMU */ +# define virt_to_page(vaddr) (pfn_to_page(virt_to_pfn(vaddr))) +# define page_to_virt(page) (pfn_to_virt(page_to_pfn(page))) +# define page_to_phys(page) (pfn_to_phys(page_to_pfn(page))) +# define page_to_bus(page) (page_to_phys(page)) +# define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) +# endif /* CONFIG_MMU */ -extern unsigned int memory_start; -extern unsigned int memory_end; -extern unsigned int memory_size; +# ifndef CONFIG_MMU +# define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) <= max_mapnr) +# define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) +# else /* CONFIG_MMU */ +# define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) +# define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) +# define VALID_PAGE(page) ((page - mem_map) < max_mapnr) +# endif /* CONFIG_MMU */ -#define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) < max_mapnr) +# endif /* __ASSEMBLY__ */ -#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) +#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#else -#define tophys(rd, rs) (addik rd, rs, 0) -#define tovirt(rd, rs) (addik rd, rs, 0) -#endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) +# ifndef CONFIG_MMU +# define __pa(vaddr) ((unsigned long) (vaddr)) +# define __va(paddr) ((void *) (paddr)) +# else /* CONFIG_MMU */ +# define __pa(x) __virt_to_phys((unsigned long)(x)) +# define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) +# endif /* CONFIG_MMU */ + -/* Convert between virtual and physical address for MMU. */ -/* Handle MicroBlaze processor with virtual memory. */ +/* Convert between virtual and physical address for MMU. */ +/* Handle MicroBlaze processor with virtual memory. */ +#ifndef CONFIG_MMU #define __virt_to_phys(addr) addr #define __phys_to_virt(addr) addr +#define tophys(rd, rs) addik rd, rs, 0 +#define tovirt(rd, rs) addik rd, rs, 0 +#else +#define __virt_to_phys(addr) \ + ((addr) + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START) +#define __phys_to_virt(addr) \ + ((addr) + CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR) +#define tophys(rd, rs) \ + addik rd, rs, (CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START) +#define tovirt(rd, rs) \ + addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR) +#endif /* CONFIG_MMU */ #define TOPHYS(addr) __virt_to_phys(addr) +#ifdef CONFIG_MMU +#ifdef CONFIG_CONTIGUOUS_PAGE_ALLOC +#define WANT_PAGE_VIRTUAL 1 /* page alloc 2 relies on this */ +#endif + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#endif /* CONFIG_MMU */ + #endif /* __KERNEL__ */ #include diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 2a4b35484010..59a757e46ba5 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -9,6 +11,195 @@ #ifndef _ASM_MICROBLAZE_PGALLOC_H #define _ASM_MICROBLAZE_PGALLOC_H +#ifdef CONFIG_MMU + +#include /* For min/max macros */ +#include +#include +#include +#include +#include + +#define PGDIR_ORDER 0 + +/* + * This is handled very differently on MicroBlaze since out page tables + * are all 0's and I want to be able to use these zero'd pages elsewhere + * as well - it gives us quite a speedup. + * -- Cort + */ +extern struct pgtable_cache_struct { + unsigned long *pgd_cache; + unsigned long *pte_cache; + unsigned long pgtable_cache_sz; +} quicklists; + +#define pgd_quicklist (quicklists.pgd_cache) +#define pmd_quicklist ((unsigned long *)0) +#define pte_quicklist (quicklists.pte_cache) +#define pgtable_cache_size (quicklists.pgtable_cache_sz) + +extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */ +extern atomic_t zero_sz; /* # currently pre-zero'd pages */ +extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */ +extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */ +extern atomic_t zerototal; /* # pages zero'd over time */ + +#define zero_quicklist (zero_cache) +#define zero_cache_sz (zero_sz) +#define zero_cache_calls (zeropage_calls) +#define zero_cache_hits (zeropage_hits) +#define zero_cache_total (zerototal) + +/* + * return a pre-zero'd page from the list, + * return NULL if none available -- Cort + */ +extern unsigned long get_zero_page_fast(void); + +extern void __bad_pte(pmd_t *pmd); + +extern inline pgd_t *get_pgd_slow(void) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER); + if (ret != NULL) + clear_page(ret); + return ret; +} + +extern inline pgd_t *get_pgd_fast(void) +{ + unsigned long *ret; + + ret = pgd_quicklist; + if (ret != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; +} + +extern inline void free_pgd_fast(pgd_t *pgd) +{ + *(unsigned long **)pgd = pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; +} + +extern inline void free_pgd_slow(pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} + +#define pgd_free(mm, pgd) free_pgd_fast(pgd) +#define pgd_alloc(mm) get_pgd_fast() + +#define pmd_pgtable(pmd) pmd_page(pmd) + +/* + * We don't have any real pmd's, and this code never triggers because + * the pgd will always be present.. + */ +#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); }) +/* FIXME two definition - look below */ +#define pmd_free(mm, x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + pte_t *pte; + extern int mem_init_done; + extern void *early_get_page(void); + if (mem_init_done) { + pte = (pte_t *)__get_free_page(GFP_KERNEL | + __GFP_REPEAT | __GFP_ZERO); + } else { + pte = (pte_t *)early_get_page(); + if (pte) + clear_page(pte); + } + return pte; +} + +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *ptepage; + +#ifdef CONFIG_HIGHPTE + int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; +#else + int flags = GFP_KERNEL | __GFP_REPEAT; +#endif + + ptepage = alloc_pages(flags, 0); + if (ptepage) + clear_highpage(ptepage); + return ptepage; +} + +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) +{ + unsigned long *ret; + + ret = pte_quicklist; + if (ret != NULL) { + pte_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +extern inline void pte_free_fast(pte_t *pte) +{ + *(unsigned long **)pte = pte_quicklist; + pte_quicklist = (unsigned long *) pte; + pgtable_cache_size++; +} + +extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +extern inline void pte_free_slow(struct page *ptepage) +{ + __free_page(ptepage); +} + +extern inline void pte_free(struct mm_struct *mm, struct page *ptepage) +{ + __free_page(ptepage); +} + +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) + +#define pmd_populate(mm, pmd, pte) (pmd_val(*(pmd)) = page_address(pte)) + +#define pmd_populate_kernel(mm, pmd, pte) \ + (pmd_val(*(pmd)) = (unsigned long) (pte)) + +/* + * We don't have any real pmd's, and this code never triggers because + * the pgd will always be present.. + */ +#define pmd_alloc_one(mm, address) ({ BUG(); ((pmd_t *)2); }) +/*#define pmd_free(mm, x) do { } while (0)*/ +#define __pmd_free_tlb(tlb, x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +extern int do_check_pgt_cache(int, int); + +#endif /* CONFIG_MMU */ + #define check_pgt_cache() do {} while (0) #endif /* _ASM_MICROBLAZE_PGALLOC_H */ -- cgit v1.2.2 From 5233806dfe6f88fb1a01db3729eeda78f65bcbd1 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:18 +0200 Subject: microblaze_mmu_v2: Update process creation for MMU Signed-off-by: Michal Simek --- arch/microblaze/include/asm/processor.h | 95 +++++++++++++++++++++++++++++++-- arch/microblaze/include/asm/registers.h | 21 +++++++- arch/microblaze/include/asm/segment.h | 20 ++++--- arch/microblaze/kernel/process.c | 58 ++++++++++++++++++++ 4 files changed, 181 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 9329029d2614..563c6b9453f0 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2008 PetaLogix + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -26,14 +26,15 @@ extern const struct seq_operations cpuinfo_op; # define cpu_sleep() do {} while (0) # define prepare_to_copy(tsk) do {} while (0) -# endif /* __ASSEMBLY__ */ - #define task_pt_regs(tsk) \ (((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1) /* Do necessary setup to start up a newly executed thread. */ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp); +# endif /* __ASSEMBLY__ */ + +# ifndef CONFIG_MMU /* * User space process size: memory size * @@ -85,4 +86,90 @@ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); # define KSTK_EIP(tsk) (0) # define KSTK_ESP(tsk) (0) +# else /* CONFIG_MMU */ + +/* + * This is used to define STACK_TOP, and with MMU it must be below + * kernel base to select the correct PGD when handling MMU exceptions. + */ +# define TASK_SIZE (CONFIG_KERNEL_START) + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +# define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) + +# define THREAD_KSP 0 + +# ifndef __ASSEMBLY__ + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +# define current_text_addr() ({ __label__ _l; _l: &&_l; }) + +/* If you change this, you must change the associated assembly-languages + * constants defined below, THREAD_*. + */ +struct thread_struct { + /* kernel stack pointer (must be first field in structure) */ + unsigned long ksp; + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ + void *pgdir; /* root of page-table tree */ + struct pt_regs *regs; /* Pointer to saved register state */ +}; + +# define INIT_THREAD { \ + .ksp = sizeof init_stack + (unsigned long)init_stack, \ + .pgdir = swapper_pg_dir, \ +} + +/* Do necessary setup to start up a newly executed thread. */ +void start_thread(struct pt_regs *regs, + unsigned long pc, unsigned long usp); + +/* Free all resources held by a thread. */ +extern inline void release_thread(struct task_struct *dead_task) +{ +} + +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* Free current thread data structures etc. */ +static inline void exit_thread(void) +{ +} + +/* Return saved (kernel) PC of a blocked thread. */ +# define thread_saved_pc(tsk) \ + ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0) + +unsigned long get_wchan(struct task_struct *p); + +/* The size allocated for kernel stacks. This _must_ be a power of two! */ +# define KERNEL_STACK_SIZE 0x2000 + +/* Return some info about the user process TASK. */ +# define task_tos(task) ((unsigned long)(task) + KERNEL_STACK_SIZE) +# define task_regs(task) ((struct pt_regs *)task_tos(task) - 1) + +# define task_pt_regs_plus_args(tsk) \ + (((void *)task_pt_regs(tsk)) - STATE_SAVE_ARG_SPACE) + +# define task_sp(task) (task_regs(task)->r1) +# define task_pc(task) (task_regs(task)->pc) +/* Grotty old names for some. */ +# define KSTK_EIP(task) (task_pc(task)) +# define KSTK_ESP(task) (task_sp(task)) + +/* FIXME */ +# define deactivate_mm(tsk, mm) do { } while (0) + +# define STACK_TOP TASK_SIZE +# define STACK_TOP_MAX STACK_TOP + +# endif /* __ASSEMBLY__ */ +# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_PROCESSOR_H */ diff --git a/arch/microblaze/include/asm/registers.h b/arch/microblaze/include/asm/registers.h index 834142d9356f..68c3afb73877 100644 --- a/arch/microblaze/include/asm/registers.h +++ b/arch/microblaze/include/asm/registers.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2008 PetaLogix + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -30,4 +30,21 @@ #define FSR_UF (1<<1) /* Underflow */ #define FSR_DO (1<<0) /* Denormalized operand error */ +# ifdef CONFIG_MMU +/* Machine State Register (MSR) Fields */ +# define MSR_UM (1<<11) /* User Mode */ +# define MSR_UMS (1<<12) /* User Mode Save */ +# define MSR_VM (1<<13) /* Virtual Mode */ +# define MSR_VMS (1<<14) /* Virtual Mode Save */ + +# define MSR_KERNEL (MSR_EE | MSR_VM) +/* # define MSR_USER (MSR_KERNEL | MSR_UM | MSR_IE) */ +# define MSR_KERNEL_VMS (MSR_EE | MSR_VMS) +/* # define MSR_USER_VMS (MSR_KERNEL_VMS | MSR_UMS | MSR_IE) */ + +/* Exception State Register (ESR) Fields */ +# define ESR_DIZ (1<<11) /* Zone Protection */ +# define ESR_S (1<<10) /* Store instruction */ + +# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_REGISTERS_H */ diff --git a/arch/microblaze/include/asm/segment.h b/arch/microblaze/include/asm/segment.h index 7f5dcc56eea1..0e7102c3fb11 100644 --- a/arch/microblaze/include/asm/segment.h +++ b/arch/microblaze/include/asm/segment.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2008 PetaLogix + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -11,7 +11,7 @@ #ifndef _ASM_MICROBLAZE_SEGMENT_H #define _ASM_MICROBLAZE_SEGMENT_H -#ifndef __ASSEMBLY__ +# ifndef __ASSEMBLY__ typedef struct { unsigned long seg; @@ -29,15 +29,21 @@ typedef struct { * * For non-MMU arch like Microblaze, KERNEL_DS and USER_DS is equal. */ -# define KERNEL_DS ((mm_segment_t){0}) +# define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +# ifndef CONFIG_MMU +# define KERNEL_DS MAKE_MM_SEG(0) # define USER_DS KERNEL_DS +# else +# define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +# define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) +# endif # define get_ds() (KERNEL_DS) # define get_fs() (current_thread_info()->addr_limit) -# define set_fs(x) \ - do { current_thread_info()->addr_limit = (x); } while (0) +# define set_fs(val) (current_thread_info()->addr_limit = (val)) -# define segment_eq(a, b) ((a).seg == (b).seg) +# define segment_eq(a, b) ((a).seg == (b).seg) # endif /* __ASSEMBLY__ */ #endif /* _ASM_MICROBLAZE_SEGMENT_H */ diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 685ad71ced50..00b12c6d5326 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -126,9 +126,54 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, else childregs->r1 = ((unsigned long) ti) + THREAD_SIZE; +#ifndef CONFIG_MMU memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); ti->cpu_context.r1 = (unsigned long)childregs; ti->cpu_context.msr = (unsigned long)childregs->msr; +#else + + /* if creating a kernel thread then update the current reg (we don't + * want to use the parent's value when restoring by POP_STATE) */ + if (kernel_mode(regs)) + /* save new current on stack to use POP_STATE */ + childregs->CURRENT_TASK = (unsigned long)p; + /* if returning to user then use the parent's value of this register */ + + /* if we're creating a new kernel thread then just zeroing all + * the registers. That's OK for a brand new thread.*/ + /* Pls. note that some of them will be restored in POP_STATE */ + if (kernel_mode(regs)) + memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); + /* if this thread is created for fork/vfork/clone, then we want to + * restore all the parent's context */ + /* in addition to the registers which will be restored by POP_STATE */ + else { + ti->cpu_context = *(struct cpu_context *)regs; + childregs->msr |= MSR_UMS; + } + + /* FIXME STATE_SAVE_PT_OFFSET; */ + ti->cpu_context.r1 = (unsigned long)childregs - STATE_SAVE_ARG_SPACE; + /* we should consider the fact that childregs is a copy of the parent + * regs which were saved immediately after entering the kernel state + * before enabling VM. This MSR will be restored in switch_to and + * RETURN() and we want to have the right machine state there + * specifically this state must have INTs disabled before and enabled + * after performing rtbd + * compose the right MSR for RETURN(). It will work for switch_to also + * excepting for VM and UMS + * don't touch UMS , CARRY and cache bits + * right now MSR is a copy of parent one */ + childregs->msr |= MSR_BIP; + childregs->msr &= ~MSR_EIP; + childregs->msr |= MSR_IE; + childregs->msr &= ~MSR_VM; + childregs->msr |= MSR_VMS; + childregs->msr |= MSR_EE; /* exceptions will be enabled*/ + + ti->cpu_context.msr = (childregs->msr|MSR_VM); + ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */ +#endif ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8; if (clone_flags & CLONE_SETTLS) @@ -137,6 +182,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +#ifndef CONFIG_MMU /* * Return saved PC of a blocked thread. */ @@ -151,6 +197,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) else return ctx->r14; } +#endif static void kernel_thread_helper(int (*fn)(void *), void *arg) { @@ -189,3 +236,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) regs->r1 = usp; regs->pt_mode = 0; } + +#ifdef CONFIG_MMU +#include +/* + * Set up a thread for executing a new program + */ +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) +{ + return 0; /* MicroBlaze has no separate FPU registers */ +} +#endif /* CONFIG_MMU */ -- cgit v1.2.2 From 45be7d46a9928c6b8ed747e020748500da7e66f1 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:18 +0200 Subject: microblaze_mmu_v2: Update tlb.h and tlbflush.h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/tlb.h | 8 ++++++ arch/microblaze/include/asm/tlbflush.h | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h index d1dfe3791127..c472d2801132 100644 --- a/arch/microblaze/include/asm/tlb.h +++ b/arch/microblaze/include/asm/tlb.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -13,4 +15,10 @@ #include +#ifdef CONFIG_MMU +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) +#endif + #endif /* _ASM_MICROBLAZE_TLB_H */ diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h index d7fe7629001b..eb31a0e8a772 100644 --- a/arch/microblaze/include/asm/tlbflush.h +++ b/arch/microblaze/include/asm/tlbflush.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -9,6 +11,50 @@ #ifndef _ASM_MICROBLAZE_TLBFLUSH_H #define _ASM_MICROBLAZE_TLBFLUSH_H +#ifdef CONFIG_MMU + +#include +#include +#include /* For TASK_SIZE */ +#include +#include +#include + +extern void _tlbie(unsigned long address); +extern void _tlbia(void); + +#define __tlbia() _tlbia() + +static inline void local_flush_tlb_all(void) + { __tlbia(); } +static inline void local_flush_tlb_mm(struct mm_struct *mm) + { __tlbia(); } +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) + { _tlbie(vmaddr); } +static inline void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) + { __tlbia(); } + +#define flush_tlb_kernel_range(start, end) do { } while (0) + +#define update_mmu_cache(vma, addr, pte) do { } while (0) + +#define flush_tlb_all local_flush_tlb_all +#define flush_tlb_mm local_flush_tlb_mm +#define flush_tlb_page local_flush_tlb_page +#define flush_tlb_range local_flush_tlb_range + +/* + * This is called in munmap when we have freed up some page-table + * pages. We don't need to do anything here, there's nothing special + * about our page-table pages. -- paulus + */ +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) { } + +#else /* CONFIG_MMU */ + #define flush_tlb() BUG() #define flush_tlb_all() BUG() #define flush_tlb_mm(mm) BUG() @@ -17,4 +63,6 @@ #define flush_tlb_pgtables(mm, start, end) BUG() #define flush_tlb_kernel_range(start, end) BUG() +#endif /* CONFIG_MMU */ + #endif /* _ASM_MICROBLAZE_TLBFLUSH_H */ -- cgit v1.2.2 From 627cef44f4f4dfc22bebf3a68378bf3e3bedd21e Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:19 +0200 Subject: microblaze_mmu_v2: MMU asm offset update Signed-off-by: Michal Simek --- arch/microblaze/kernel/asm-offsets.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c index aabd9e9423a6..7bc7b68f97db 100644 --- a/arch/microblaze/kernel/asm-offsets.c +++ b/arch/microblaze/kernel/asm-offsets.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2007-2009 Michal Simek * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * @@ -68,16 +69,26 @@ int main(int argc, char *argv[]) /* struct task_struct */ DEFINE(TS_THREAD_INFO, offsetof(struct task_struct, stack)); +#ifdef CONFIG_MMU + DEFINE(TASK_STATE, offsetof(struct task_struct, state)); + DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags)); + DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); + DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); + DEFINE(TASK_MM, offsetof(struct task_struct, mm)); + DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + DEFINE(TASK_PID, offsetof(struct task_struct, pid)); + DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); + DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp)); + BLANK(); + + DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); + BLANK(); +#endif /* struct thread_info */ DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_STATUS, offsetof(struct thread_info, status)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); - DEFINE(TI_RESTART_BLOCK, offsetof(struct thread_info, restart_block)); DEFINE(TI_CPU_CONTEXT, offsetof(struct thread_info, cpu_context)); BLANK(); -- cgit v1.2.2 From 23cfc369337fa106d08cbed0dc86527c67966ff2 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:20 +0200 Subject: microblaze_mmu_v2: Add CURRENT_TASK for entry.S Signed-off-by: Michal Simek --- arch/microblaze/include/asm/current.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/current.h b/arch/microblaze/include/asm/current.h index 8375ea991e26..29303ed825cc 100644 --- a/arch/microblaze/include/asm/current.h +++ b/arch/microblaze/include/asm/current.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -9,6 +11,12 @@ #ifndef _ASM_MICROBLAZE_CURRENT_H #define _ASM_MICROBLAZE_CURRENT_H +/* + * Register used to hold the current task pointer while in the kernel. + * Any `call clobbered' register without a special meaning should be OK, + * but check asm/microblaze/kernel/entry.S to be sure. + */ +#define CURRENT_TASK r31 # ifndef __ASSEMBLY__ /* * Dedicate r31 to keeping the current task pointer -- cgit v1.2.2 From ca54502bd52a5d483f7ba076b613ad2ee43941da Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:21 +0200 Subject: microblaze_mmu_v2: entry.S, entry.h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/entry.h | 37 +- arch/microblaze/kernel/entry.S | 1116 +++++++++++++++++++++++++++++++++++ 2 files changed, 1151 insertions(+), 2 deletions(-) create mode 100644 arch/microblaze/kernel/entry.S (limited to 'arch') diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h index e4c3aef884df..61abbd232640 100644 --- a/arch/microblaze/include/asm/entry.h +++ b/arch/microblaze/include/asm/entry.h @@ -1,8 +1,8 @@ /* * Definitions used by low-level trap handlers * - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2007 - 2008 PetaLogix + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2007 John Williams * * This file is subject to the terms and conditions of the GNU General @@ -31,7 +31,40 @@ DECLARE_PER_CPU(unsigned int, R11_SAVE); /* Temp variable for entry */ DECLARE_PER_CPU(unsigned int, CURRENT_SAVE); /* Saved current pointer */ # endif /* __ASSEMBLY__ */ +#ifndef CONFIG_MMU + /* noMMU hasn't any space for args */ # define STATE_SAVE_ARG_SPACE (0) +#else /* CONFIG_MMU */ + +/* If true, system calls save and restore all registers (except result + * registers, of course). If false, then `call clobbered' registers + * will not be preserved, on the theory that system calls are basically + * function calls anyway, and the caller should be able to deal with it. + * This is a security risk, of course, as `internal' values may leak out + * after a system call, but that certainly doesn't matter very much for + * a processor with no MMU protection! For a protected-mode kernel, it + * would be faster to just zero those registers before returning. + * + * I can not rely on the glibc implementation. If you turn it off make + * sure that r11/r12 is saved in user-space. --KAA + * + * These are special variables using by the kernel trap/interrupt code + * to save registers in, at a time when there are no spare registers we + * can use to do so, and we can't depend on the value of the stack + * pointer. This means that they must be within a signed 16-bit + * displacement of 0x00000000. + */ + +/* A `state save frame' is a struct pt_regs preceded by some extra space + * suitable for a function call stack frame. */ + +/* Amount of room on the stack reserved for arguments and to satisfy the + * C calling conventions, in addition to the space used by the struct + * pt_regs that actually holds saved values. */ +#define STATE_SAVE_ARG_SPACE (6*4) /* Up to six arguments */ + +#endif /* CONFIG_MMU */ + #endif /* _ASM_MICROBLAZE_ENTRY_H */ diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S new file mode 100644 index 000000000000..91a0e7b185dd --- /dev/null +++ b/arch/microblaze/kernel/entry.S @@ -0,0 +1,1116 @@ +/* + * Low-level system-call handling, trap handlers and context-switching + * + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2003 John Williams + * Copyright (C) 2001,2002 NEC Corporation + * Copyright (C) 2001,2002 Miles Bader + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + * Written by Miles Bader + * Heavily modified by John Williams for Microblaze + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +/* The size of a state save frame. */ +#define STATE_SAVE_SIZE (PT_SIZE + STATE_SAVE_ARG_SPACE) + +/* The offset of the struct pt_regs in a `state save frame' on the stack. */ +#define PTO STATE_SAVE_ARG_SPACE /* 24 the space for args */ + +#define C_ENTRY(name) .globl name; .align 4; name + +/* + * Various ways of setting and clearing BIP in flags reg. + * This is mucky, but necessary using microblaze version that + * allows msr ops to write to BIP + */ +#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR + .macro clear_bip + msrclr r11, MSR_BIP + nop + .endm + + .macro set_bip + msrset r11, MSR_BIP + nop + .endm + + .macro clear_eip + msrclr r11, MSR_EIP + nop + .endm + + .macro set_ee + msrset r11, MSR_EE + nop + .endm + + .macro disable_irq + msrclr r11, MSR_IE + nop + .endm + + .macro enable_irq + msrset r11, MSR_IE + nop + .endm + + .macro set_ums + msrset r11, MSR_UMS + nop + msrclr r11, MSR_VMS + nop + .endm + + .macro set_vms + msrclr r11, MSR_UMS + nop + msrset r11, MSR_VMS + nop + .endm + + .macro clear_vms_ums + msrclr r11, MSR_VMS + nop + msrclr r11, MSR_UMS + nop + .endm +#else + .macro clear_bip + mfs r11, rmsr + nop + andi r11, r11, ~MSR_BIP + mts rmsr, r11 + nop + .endm + + .macro set_bip + mfs r11, rmsr + nop + ori r11, r11, MSR_BIP + mts rmsr, r11 + nop + .endm + + .macro clear_eip + mfs r11, rmsr + nop + andi r11, r11, ~MSR_EIP + mts rmsr, r11 + nop + .endm + + .macro set_ee + mfs r11, rmsr + nop + ori r11, r11, MSR_EE + mts rmsr, r11 + nop + .endm + + .macro disable_irq + mfs r11, rmsr + nop + andi r11, r11, ~MSR_IE + mts rmsr, r11 + nop + .endm + + .macro enable_irq + mfs r11, rmsr + nop + ori r11, r11, MSR_IE + mts rmsr, r11 + nop + .endm + + .macro set_ums + mfs r11, rmsr + nop + ori r11, r11, MSR_VMS + andni r11, r11, MSR_UMS + mts rmsr, r11 + nop + .endm + + .macro set_vms + mfs r11, rmsr + nop + ori r11, r11, MSR_VMS + andni r11, r11, MSR_UMS + mts rmsr, r11 + nop + .endm + + .macro clear_vms_ums + mfs r11, rmsr + nop + andni r11, r11, (MSR_VMS|MSR_UMS) + mts rmsr,r11 + nop + .endm +#endif + +/* Define how to call high-level functions. With MMU, virtual mode must be + * enabled when calling the high-level function. Clobbers R11. + * VM_ON, VM_OFF, DO_JUMP_BIPCLR, DO_CALL + */ + +/* turn on virtual protected mode save */ +#define VM_ON \ + set_ums; \ + rted r0, 2f; \ +2: nop; + +/* turn off virtual protected mode save and user mode save*/ +#define VM_OFF \ + clear_vms_ums; \ + rted r0, TOPHYS(1f); \ +1: nop; + +#define SAVE_REGS \ + swi r2, r1, PTO+PT_R2; /* Save SDA */ \ + swi r5, r1, PTO+PT_R5; \ + swi r6, r1, PTO+PT_R6; \ + swi r7, r1, PTO+PT_R7; \ + swi r8, r1, PTO+PT_R8; \ + swi r9, r1, PTO+PT_R9; \ + swi r10, r1, PTO+PT_R10; \ + swi r11, r1, PTO+PT_R11; /* save clobbered regs after rval */\ + swi r12, r1, PTO+PT_R12; \ + swi r13, r1, PTO+PT_R13; /* Save SDA2 */ \ + swi r14, r1, PTO+PT_PC; /* PC, before IRQ/trap */ \ + swi r15, r1, PTO+PT_R15; /* Save LP */ \ + swi r18, r1, PTO+PT_R18; /* Save asm scratch reg */ \ + swi r19, r1, PTO+PT_R19; \ + swi r20, r1, PTO+PT_R20; \ + swi r21, r1, PTO+PT_R21; \ + swi r22, r1, PTO+PT_R22; \ + swi r23, r1, PTO+PT_R23; \ + swi r24, r1, PTO+PT_R24; \ + swi r25, r1, PTO+PT_R25; \ + swi r26, r1, PTO+PT_R26; \ + swi r27, r1, PTO+PT_R27; \ + swi r28, r1, PTO+PT_R28; \ + swi r29, r1, PTO+PT_R29; \ + swi r30, r1, PTO+PT_R30; \ + swi r31, r1, PTO+PT_R31; /* Save current task reg */ \ + mfs r11, rmsr; /* save MSR */ \ + nop; \ + swi r11, r1, PTO+PT_MSR; + +#define RESTORE_REGS \ + lwi r11, r1, PTO+PT_MSR; \ + mts rmsr , r11; \ + nop; \ + lwi r2, r1, PTO+PT_R2; /* restore SDA */ \ + lwi r5, r1, PTO+PT_R5; \ + lwi r6, r1, PTO+PT_R6; \ + lwi r7, r1, PTO+PT_R7; \ + lwi r8, r1, PTO+PT_R8; \ + lwi r9, r1, PTO+PT_R9; \ + lwi r10, r1, PTO+PT_R10; \ + lwi r11, r1, PTO+PT_R11; /* restore clobbered regs after rval */\ + lwi r12, r1, PTO+PT_R12; \ + lwi r13, r1, PTO+PT_R13; /* restore SDA2 */ \ + lwi r14, r1, PTO+PT_PC; /* RESTORE_LINK PC, before IRQ/trap */\ + lwi r15, r1, PTO+PT_R15; /* restore LP */ \ + lwi r18, r1, PTO+PT_R18; /* restore asm scratch reg */ \ + lwi r19, r1, PTO+PT_R19; \ + lwi r20, r1, PTO+PT_R20; \ + lwi r21, r1, PTO+PT_R21; \ + lwi r22, r1, PTO+PT_R22; \ + lwi r23, r1, PTO+PT_R23; \ + lwi r24, r1, PTO+PT_R24; \ + lwi r25, r1, PTO+PT_R25; \ + lwi r26, r1, PTO+PT_R26; \ + lwi r27, r1, PTO+PT_R27; \ + lwi r28, r1, PTO+PT_R28; \ + lwi r29, r1, PTO+PT_R29; \ + lwi r30, r1, PTO+PT_R30; \ + lwi r31, r1, PTO+PT_R31; /* Restore cur task reg */ + +.text + +/* + * User trap. + * + * System calls are handled here. + * + * Syscall protocol: + * Syscall number in r12, args in r5-r10 + * Return value in r3 + * + * Trap entered via brki instruction, so BIP bit is set, and interrupts + * are masked. This is nice, means we don't have to CLI before state save + */ +C_ENTRY(_user_exception): + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ + addi r14, r14, 4 /* return address is 4 byte after call */ + swi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */ + + lwi r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/ + beqi r11, 1f; /* Jump ahead if coming from user */ +/* Kernel-mode state save. */ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/ + tophys(r1,r11); + swi r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */ + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */ + + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ + SAVE_REGS + + addi r11, r0, 1; /* Was in kernel-mode. */ + swi r11, r1, PTO+PT_MODE; /* pt_regs -> kernel mode */ + brid 2f; + nop; /* Fill delay slot */ + +/* User-mode state save. */ +1: + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */ + lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */ + tophys(r1,r1); + lwi r1, r1, TS_THREAD_INFO; /* get stack from task_struct */ +/* calculate kernel stack pointer from task struct 8k */ + addik r1, r1, THREAD_SIZE; + tophys(r1,r1); + + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ + SAVE_REGS + + swi r0, r1, PTO+PT_MODE; /* Was in user-mode. */ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); + swi r11, r1, PTO+PT_R1; /* Store user SP. */ + addi r11, r0, 1; + swi r11, r0, TOPHYS(PER_CPU(KM)); /* Now we're in kernel-mode. */ +2: lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */ + /* Save away the syscall number. */ + swi r12, r1, PTO+PT_R0; + tovirt(r1,r1) + + la r15, r0, ret_from_trap-8 +/* where the trap should return need -8 to adjust for rtsd r15, 8*/ +/* Jump to the appropriate function for the system call number in r12 + * (r12 is not preserved), or return an error if r12 is not valid. The LP + * register should point to the location where + * the called function should return. [note that MAKE_SYS_CALL uses label 1] */ + /* See if the system call number is valid. */ + addi r11, r12, -__NR_syscalls; + bgei r11,1f; + /* Figure out which function to use for this system call. */ + /* Note Microblaze barrel shift is optional, so don't rely on it */ + add r12, r12, r12; /* convert num -> ptr */ + add r12, r12, r12; + + /* Trac syscalls and stored them to r0_ram */ + lwi r3, r12, 0x400 + TOPHYS(r0_ram) + addi r3, r3, 1 + swi r3, r12, 0x400 + TOPHYS(r0_ram) + + lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */ + /* Make the system call. to r12*/ + set_vms; + rtid r12, 0; + nop; + /* The syscall number is invalid, return an error. */ +1: VM_ON; /* RETURN() expects virtual mode*/ + addi r3, r0, -ENOSYS; + rtsd r15,8; /* looks like a normal subroutine return */ + or r0, r0, r0 + + +/* Entry point used to return from a syscall/trap. */ +/* We re-enable BIP bit before state restore */ +C_ENTRY(ret_from_trap): + set_bip; /* Ints masked for state restore*/ + lwi r11, r1, PTO+PT_MODE; +/* See if returning to kernel mode, if so, skip resched &c. */ + bnei r11, 2f; + + /* We're returning to user mode, so check for various conditions that + * trigger rescheduling. */ + /* Get current task ptr into r11 */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_NEED_RESCHED; + beqi r11, 5f; + + swi r3, r1, PTO + PT_R3; /* store syscall result */ + swi r4, r1, PTO + PT_R4; + bralid r15, schedule; /* Call scheduler */ + nop; /* delay slot */ + lwi r3, r1, PTO + PT_R3; /* restore syscall result */ + lwi r4, r1, PTO + PT_R4; + + /* Maybe handle a signal */ +5: add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_SIGPENDING; + beqi r11, 1f; /* Signals to handle, handle them */ + + swi r3, r1, PTO + PT_R3; /* store syscall result */ + swi r4, r1, PTO + PT_R4; + la r5, r1, PTO; /* Arg 1: struct pt_regs *regs */ + add r6, r0, r0; /* Arg 2: sigset_t *oldset */ + addi r7, r0, 1; /* Arg 3: int in_syscall */ + bralid r15, do_signal; /* Handle any signals */ + nop; + lwi r3, r1, PTO + PT_R3; /* restore syscall result */ + lwi r4, r1, PTO + PT_R4; + +/* Finally, return to user state. */ +1: swi r0, r0, PER_CPU(KM); /* Now officially in user state. */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + swi r11, r0, PER_CPU(CURRENT_SAVE); /* save current */ + VM_OFF; + tophys(r1,r1); + RESTORE_REGS; + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + lwi r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */ + bri 6f; + +/* Return to kernel state. */ +2: VM_OFF; + tophys(r1,r1); + RESTORE_REGS; + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + tovirt(r1,r1); +6: +TRAP_return: /* Make global symbol for debugging */ + rtbd r14, 0; /* Instructions to return from an IRQ */ + nop; + + +/* These syscalls need access to the struct pt_regs on the stack, so we + implement them in assembly (they're basically all wrappers anyway). */ + +C_ENTRY(sys_fork_wrapper): + addi r5, r0, SIGCHLD /* Arg 0: flags */ + lwi r6, r1, PTO+PT_R1 /* Arg 1: child SP (use parent's) */ + la r7, r1, PTO /* Arg 2: parent context */ + add r8. r0, r0 /* Arg 3: (unused) */ + add r9, r0, r0; /* Arg 4: (unused) */ + add r10, r0, r0; /* Arg 5: (unused) */ + brid do_fork /* Do real work (tail-call) */ + nop; + +/* This the initial entry point for a new child thread, with an appropriate + stack in place that makes it look the the child is in the middle of an + syscall. This function is actually `returned to' from switch_thread + (copy_thread makes ret_from_fork the return address in each new thread's + saved context). */ +C_ENTRY(ret_from_fork): + bralid r15, schedule_tail; /* ...which is schedule_tail's arg */ + add r3, r5, r0; /* switch_thread returns the prev task */ + /* ( in the delay slot ) */ + add r3, r0, r0; /* Child's fork call should return 0. */ + brid ret_from_trap; /* Do normal trap return */ + nop; + +C_ENTRY(sys_vfork_wrapper): + la r5, r1, PTO + brid sys_vfork /* Do real work (tail-call) */ + nop + +C_ENTRY(sys_clone_wrapper): + bnei r6, 1f; /* See if child SP arg (arg 1) is 0. */ + lwi r6, r1, PTO+PT_R1; /* If so, use paret's stack ptr */ +1: la r7, r1, PTO; /* Arg 2: parent context */ + add r8, r0, r0; /* Arg 3: (unused) */ + add r9, r0, r0; /* Arg 4: (unused) */ + add r10, r0, r0; /* Arg 5: (unused) */ + brid do_fork /* Do real work (tail-call) */ + nop; + +C_ENTRY(sys_execve_wrapper): + la r8, r1, PTO; /* add user context as 4th arg */ + brid sys_execve; /* Do real work (tail-call).*/ + nop; + +C_ENTRY(sys_sigsuspend_wrapper): + swi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + swi r4, r1, PTO+PT_R4; + la r6, r1, PTO; /* add user context as 2nd arg */ + bralid r15, sys_sigsuspend; /* Do real work.*/ + nop; + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + bri ret_from_trap /* fall through will not work here due to align */ + nop; + +C_ENTRY(sys_rt_sigsuspend_wrapper): + swi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + swi r4, r1, PTO+PT_R4; + la r7, r1, PTO; /* add user context as 3rd arg */ + brlid r15, sys_rt_sigsuspend; /* Do real work.*/ + nop; + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + bri ret_from_trap /* fall through will not work here due to align */ + nop; + + +C_ENTRY(sys_sigreturn_wrapper): + swi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + swi r4, r1, PTO+PT_R4; + la r5, r1, PTO; /* add user context as 1st arg */ + brlid r15, sys_sigreturn; /* Do real work.*/ + nop; + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + bri ret_from_trap /* fall through will not work here due to align */ + nop; + +C_ENTRY(sys_rt_sigreturn_wrapper): + swi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + swi r4, r1, PTO+PT_R4; + la r5, r1, PTO; /* add user context as 1st arg */ + brlid r15, sys_rt_sigreturn /* Do real work */ + nop; + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + bri ret_from_trap /* fall through will not work here due to align */ + nop; + +/* + * HW EXCEPTION rutine start + */ + +#define SAVE_STATE \ + swi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */ \ + set_bip; /*equalize initial state for all possible entries*/\ + clear_eip; \ + enable_irq; \ + set_ee; \ + /* See if already in kernel mode.*/ \ + lwi r11, r0, TOPHYS(PER_CPU(KM)); \ + beqi r11, 1f; /* Jump ahead if coming from user */\ + /* Kernel-mode state save. */ \ + /* Reload kernel stack-ptr. */ \ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); \ + tophys(r1,r11); \ + swi r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */ \ + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\ + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */\ + /* store return registers separately because \ + * this macros is use for others exceptions */ \ + swi r3, r1, PTO + PT_R3; \ + swi r4, r1, PTO + PT_R4; \ + SAVE_REGS \ + /* PC, before IRQ/trap - this is one instruction above */ \ + swi r17, r1, PTO+PT_PC; \ + \ + addi r11, r0, 1; /* Was in kernel-mode. */ \ + swi r11, r1, PTO+PT_MODE; \ + brid 2f; \ + nop; /* Fill delay slot */ \ +1: /* User-mode state save. */ \ + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\ + lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\ + tophys(r1,r1); \ + lwi r1, r1, TS_THREAD_INFO; /* get the thread info */ \ + addik r1, r1, THREAD_SIZE; /* calculate kernel stack pointer */\ + tophys(r1,r1); \ + \ + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */\ + /* store return registers separately because this macros \ + * is use for others exceptions */ \ + swi r3, r1, PTO + PT_R3; \ + swi r4, r1, PTO + PT_R4; \ + SAVE_REGS \ + /* PC, before IRQ/trap - this is one instruction above FIXME*/ \ + swi r17, r1, PTO+PT_PC; \ + \ + swi r0, r1, PTO+PT_MODE; /* Was in user-mode. */ \ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); \ + swi r11, r1, PTO+PT_R1; /* Store user SP. */ \ + addi r11, r0, 1; \ + swi r11, r0, TOPHYS(PER_CPU(KM)); /* Now we're in kernel-mode.*/\ +2: lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\ + /* Save away the syscall number. */ \ + swi r0, r1, PTO+PT_R0; \ + tovirt(r1,r1) + +C_ENTRY(full_exception_trap): + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ + /* adjust exception address for privileged instruction + * for finding where is it */ + addik r17, r17, -4 + SAVE_STATE /* Save registers */ + /* FIXME this can be store directly in PT_ESR reg. + * I tested it but there is a fault */ + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_exc - 8 + la r5, r1, PTO /* parameter struct pt_regs * regs */ + mfs r6, resr + nop + mfs r7, rfsr; /* save FSR */ + nop + la r12, r0, full_exception + set_vms; + rtbd r12, 0; + nop; + +/* + * Unaligned data trap. + * + * Unaligned data trap last on 4k page is handled here. + * + * Trap entered via exception, so EE bit is set, and interrupts + * are masked. This is nice, means we don't have to CLI before state save + * + * The assembler routine is in "arch/microblaze/kernel/hw_exception_handler.S" + */ +C_ENTRY(unaligned_data_trap): + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ + SAVE_STATE /* Save registers.*/ + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_exc-8 + mfs r3, resr /* ESR */ + nop + mfs r4, rear /* EAR */ + nop + la r7, r1, PTO /* parameter struct pt_regs * regs */ + la r12, r0, _unaligned_data_exception + set_vms; + rtbd r12, 0; /* interrupts enabled */ + nop; + +/* + * Page fault traps. + * + * If the real exception handler (from hw_exception_handler.S) didn't find + * the mapping for the process, then we're thrown here to handle such situation. + * + * Trap entered via exceptions, so EE bit is set, and interrupts + * are masked. This is nice, means we don't have to CLI before state save + * + * Build a standard exception frame for TLB Access errors. All TLB exceptions + * will bail out to this point if they can't resolve the lightweight TLB fault. + * + * The C function called is in "arch/microblaze/mm/fault.c", declared as: + * void do_page_fault(struct pt_regs *regs, + * unsigned long address, + * unsigned long error_code) + */ +/* data and intruction trap - which is choose is resolved int fault.c */ +C_ENTRY(page_fault_data_trap): + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ + SAVE_STATE /* Save registers.*/ + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_exc-8 + la r5, r1, PTO /* parameter struct pt_regs * regs */ + mfs r6, rear /* parameter unsigned long address */ + nop + mfs r7, resr /* parameter unsigned long error_code */ + nop + la r12, r0, do_page_fault + set_vms; + rtbd r12, 0; /* interrupts enabled */ + nop; + +C_ENTRY(page_fault_instr_trap): + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */ + SAVE_STATE /* Save registers.*/ + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_exc-8 + la r5, r1, PTO /* parameter struct pt_regs * regs */ + mfs r6, rear /* parameter unsigned long address */ + nop + ori r7, r0, 0 /* parameter unsigned long error_code */ + la r12, r0, do_page_fault + set_vms; + rtbd r12, 0; /* interrupts enabled */ + nop; + +/* Entry point used to return from an exception. */ +C_ENTRY(ret_from_exc): + set_bip; /* Ints masked for state restore*/ + lwi r11, r1, PTO+PT_MODE; + bnei r11, 2f; /* See if returning to kernel mode, */ + /* ... if so, skip resched &c. */ + + /* We're returning to user mode, so check for various conditions that + trigger rescheduling. */ + /* Get current task ptr into r11 */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_NEED_RESCHED; + beqi r11, 5f; + +/* Call the scheduler before returning from a syscall/trap. */ + bralid r15, schedule; /* Call scheduler */ + nop; /* delay slot */ + + /* Maybe handle a signal */ +5: add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_SIGPENDING; + beqi r11, 1f; /* Signals to handle, handle them */ + + /* + * Handle a signal return; Pending signals should be in r18. + * + * Not all registers are saved by the normal trap/interrupt entry + * points (for instance, call-saved registers (because the normal + * C-compiler calling sequence in the kernel makes sure they're + * preserved), and call-clobbered registers in the case of + * traps), but signal handlers may want to examine or change the + * complete register state. Here we save anything not saved by + * the normal entry sequence, so that it may be safely restored + * (in a possibly modified form) after do_signal returns. + * store return registers separately because this macros is use + * for others exceptions */ + swi r3, r1, PTO + PT_R3; + swi r4, r1, PTO + PT_R4; + la r5, r1, PTO; /* Arg 1: struct pt_regs *regs */ + add r6, r0, r0; /* Arg 2: sigset_t *oldset */ + addi r7, r0, 0; /* Arg 3: int in_syscall */ + bralid r15, do_signal; /* Handle any signals */ + nop; + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + +/* Finally, return to user state. */ +1: swi r0, r0, PER_CPU(KM); /* Now officially in user state. */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + swi r11, r0, PER_CPU(CURRENT_SAVE); /* save current */ + VM_OFF; + tophys(r1,r1); + + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + RESTORE_REGS; + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + + lwi r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */ + bri 6f; +/* Return to kernel state. */ +2: VM_OFF; + tophys(r1,r1); + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + RESTORE_REGS; + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + + tovirt(r1,r1); +6: +EXC_return: /* Make global symbol for debugging */ + rtbd r14, 0; /* Instructions to return from an IRQ */ + nop; + +/* + * HW EXCEPTION rutine end + */ + +/* + * Hardware maskable interrupts. + * + * The stack-pointer (r1) should have already been saved to the memory + * location PER_CPU(ENTRY_SP). + */ +C_ENTRY(_interrupt): +/* MS: we are in physical address */ +/* Save registers, switch to proper stack, convert SP to virtual.*/ + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) + swi r11, r0, TOPHYS(PER_CPU(R11_SAVE)); + /* MS: See if already in kernel mode. */ + lwi r11, r0, TOPHYS(PER_CPU(KM)); + beqi r11, 1f; /* MS: Jump ahead if coming from user */ + +/* Kernel-mode state save. */ + or r11, r1, r0 + tophys(r1,r11); /* MS: I have in r1 physical address where stack is */ +/* MS: Save original SP - position PT_R1 to next stack frame 4 *1 - 152*/ + swi r11, r1, (PT_R1 - PT_SIZE); +/* MS: restore r11 because of saving in SAVE_REGS */ + lwi r11, r0, TOPHYS(PER_CPU(R11_SAVE)); + /* save registers */ +/* MS: Make room on the stack -> activation record */ + addik r1, r1, -STATE_SAVE_SIZE; +/* MS: store return registers separately because + * this macros is use for others exceptions */ + swi r3, r1, PTO + PT_R3; + swi r4, r1, PTO + PT_R4; + SAVE_REGS + /* MS: store mode */ + addi r11, r0, 1; /* MS: Was in kernel-mode. */ + swi r11, r1, PTO + PT_MODE; /* MS: and save it */ + brid 2f; + nop; /* MS: Fill delay slot */ + +1: +/* User-mode state save. */ +/* MS: restore r11 -> FIXME move before SAVE_REG */ + lwi r11, r0, TOPHYS(PER_CPU(R11_SAVE)); + /* MS: get the saved current */ + lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); + tophys(r1,r1); + lwi r1, r1, TS_THREAD_INFO; + addik r1, r1, THREAD_SIZE; + tophys(r1,r1); + /* save registers */ + addik r1, r1, -STATE_SAVE_SIZE; + swi r3, r1, PTO+PT_R3; + swi r4, r1, PTO+PT_R4; + SAVE_REGS + /* calculate mode */ + swi r0, r1, PTO + PT_MODE; + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); + swi r11, r1, PTO+PT_R1; + /* setup kernel mode to KM */ + addi r11, r0, 1; + swi r11, r0, TOPHYS(PER_CPU(KM)); + +2: + lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); + swi r0, r1, PTO + PT_R0; + tovirt(r1,r1) + la r5, r1, PTO; + set_vms; + la r11, r0, do_IRQ; + la r15, r0, irq_call; +irq_call:rtbd r11, 0; + nop; + +/* MS: we are in virtual mode */ +ret_from_irq: + lwi r11, r1, PTO + PT_MODE; + bnei r11, 2f; + + add r11, r0, CURRENT_TASK; + lwi r11, r11, TS_THREAD_INFO; + lwi r11, r11, TI_FLAGS; /* MS: get flags from thread info */ + andi r11, r11, _TIF_NEED_RESCHED; + beqi r11, 5f + bralid r15, schedule; + nop; /* delay slot */ + + /* Maybe handle a signal */ +5: add r11, r0, CURRENT_TASK; + lwi r11, r11, TS_THREAD_INFO; /* MS: get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_SIGPENDING; + beqid r11, no_intr_resched +/* Handle a signal return; Pending signals should be in r18. */ + addi r7, r0, 0; /* Arg 3: int in_syscall */ + la r5, r1, PTO; /* Arg 1: struct pt_regs *regs */ + bralid r15, do_signal; /* Handle any signals */ + add r6, r0, r0; /* Arg 2: sigset_t *oldset */ + +/* Finally, return to user state. */ +no_intr_resched: + /* Disable interrupts, we are now committed to the state restore */ + disable_irq + swi r0, r0, PER_CPU(KM); /* MS: Now officially in user state. */ + add r11, r0, CURRENT_TASK; + swi r11, r0, PER_CPU(CURRENT_SAVE); + VM_OFF; + tophys(r1,r1); + lwi r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */ + lwi r4, r1, PTO + PT_R4; + RESTORE_REGS + addik r1, r1, STATE_SAVE_SIZE /* MS: Clean up stack space. */ + lwi r1, r1, PT_R1 - PT_SIZE; + bri 6f; +/* MS: Return to kernel state. */ +2: VM_OFF /* MS: turn off MMU */ + tophys(r1,r1) + lwi r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */ + lwi r4, r1, PTO + PT_R4; + RESTORE_REGS + addik r1, r1, STATE_SAVE_SIZE /* MS: Clean up stack space. */ + tovirt(r1,r1); +6: +IRQ_return: /* MS: Make global symbol for debugging */ + rtid r14, 0 + nop + +/* + * `Debug' trap + * We enter dbtrap in "BIP" (breakpoint) mode. + * So we exit the breakpoint mode with an 'rtbd' and proceed with the + * original dbtrap. + * however, wait to save state first + */ +C_ENTRY(_debug_exception): + /* BIP bit is set on entry, no interrupts can occur */ + swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) + + swi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */ + set_bip; /*equalize initial state for all possible entries*/ + clear_eip; + enable_irq; + lwi r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/ + beqi r11, 1f; /* Jump ahead if coming from user */ + /* Kernel-mode state save. */ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/ + tophys(r1,r11); + swi r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */ + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */ + + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ + swi r3, r1, PTO + PT_R3; + swi r4, r1, PTO + PT_R4; + SAVE_REGS; + + addi r11, r0, 1; /* Was in kernel-mode. */ + swi r11, r1, PTO + PT_MODE; + brid 2f; + nop; /* Fill delay slot */ +1: /* User-mode state save. */ + lwi r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */ + lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */ + tophys(r1,r1); + lwi r1, r1, TS_THREAD_INFO; /* get the thread info */ + addik r1, r1, THREAD_SIZE; /* calculate kernel stack pointer */ + tophys(r1,r1); + + addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */ + swi r3, r1, PTO + PT_R3; + swi r4, r1, PTO + PT_R4; + SAVE_REGS; + + swi r0, r1, PTO+PT_MODE; /* Was in user-mode. */ + lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); + swi r11, r1, PTO+PT_R1; /* Store user SP. */ + addi r11, r0, 1; + swi r11, r0, TOPHYS(PER_CPU(KM)); /* Now we're in kernel-mode. */ +2: lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */ + /* Save away the syscall number. */ + swi r0, r1, PTO+PT_R0; + tovirt(r1,r1) + + addi r5, r0, SIGTRAP /* send the trap signal */ + add r6, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + addk r7, r0, r0 /* 3rd param zero */ + + set_vms; + la r11, r0, send_sig; + la r15, r0, dbtrap_call; +dbtrap_call: rtbd r11, 0; + nop; + + set_bip; /* Ints masked for state restore*/ + lwi r11, r1, PTO+PT_MODE; + bnei r11, 2f; + + /* Get current task ptr into r11 */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_NEED_RESCHED; + beqi r11, 5f; + +/* Call the scheduler before returning from a syscall/trap. */ + + bralid r15, schedule; /* Call scheduler */ + nop; /* delay slot */ + /* XXX Is PT_DTRACE handling needed here? */ + /* XXX m68knommu also checks TASK_STATE & TASK_COUNTER here. */ + + /* Maybe handle a signal */ +5: add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_SIGPENDING; + beqi r11, 1f; /* Signals to handle, handle them */ + +/* Handle a signal return; Pending signals should be in r18. */ + /* Not all registers are saved by the normal trap/interrupt entry + points (for instance, call-saved registers (because the normal + C-compiler calling sequence in the kernel makes sure they're + preserved), and call-clobbered registers in the case of + traps), but signal handlers may want to examine or change the + complete register state. Here we save anything not saved by + the normal entry sequence, so that it may be safely restored + (in a possibly modified form) after do_signal returns. */ + + la r5, r1, PTO; /* Arg 1: struct pt_regs *regs */ + add r6, r0, r0; /* Arg 2: sigset_t *oldset */ + addi r7, r0, 0; /* Arg 3: int in_syscall */ + bralid r15, do_signal; /* Handle any signals */ + nop; + + +/* Finally, return to user state. */ +1: swi r0, r0, PER_CPU(KM); /* Now officially in user state. */ + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + swi r11, r0, PER_CPU(CURRENT_SAVE); /* save current */ + VM_OFF; + tophys(r1,r1); + + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + RESTORE_REGS + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + + + lwi r1, r1, PT_R1 - PT_SIZE; + /* Restore user stack pointer. */ + bri 6f; + +/* Return to kernel state. */ +2: VM_OFF; + tophys(r1,r1); + lwi r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */ + lwi r4, r1, PTO+PT_R4; + RESTORE_REGS + addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */ + + tovirt(r1,r1); +6: +DBTRAP_return: /* Make global symbol for debugging */ + rtbd r14, 0; /* Instructions to return from an IRQ */ + nop; + + + +ENTRY(_switch_to) + /* prepare return value */ + addk r3, r0, r31 + + /* save registers in cpu_context */ + /* use r11 and r12, volatile registers, as temp register */ + /* give start of cpu_context for previous process */ + addik r11, r5, TI_CPU_CONTEXT + swi r1, r11, CC_R1 + swi r2, r11, CC_R2 + /* skip volatile registers. + * they are saved on stack when we jumped to _switch_to() */ + /* dedicated registers */ + swi r13, r11, CC_R13 + swi r14, r11, CC_R14 + swi r15, r11, CC_R15 + swi r16, r11, CC_R16 + swi r17, r11, CC_R17 + swi r18, r11, CC_R18 + /* save non-volatile registers */ + swi r19, r11, CC_R19 + swi r20, r11, CC_R20 + swi r21, r11, CC_R21 + swi r22, r11, CC_R22 + swi r23, r11, CC_R23 + swi r24, r11, CC_R24 + swi r25, r11, CC_R25 + swi r26, r11, CC_R26 + swi r27, r11, CC_R27 + swi r28, r11, CC_R28 + swi r29, r11, CC_R29 + swi r30, r11, CC_R30 + /* special purpose registers */ + mfs r12, rmsr + nop + swi r12, r11, CC_MSR + mfs r12, rear + nop + swi r12, r11, CC_EAR + mfs r12, resr + nop + swi r12, r11, CC_ESR + mfs r12, rfsr + nop + swi r12, r11, CC_FSR + + /* update r31, the current */ + lwi r31, r6, TI_TASK/* give me pointer to task which will be next */ + /* stored it to current_save too */ + swi r31, r0, PER_CPU(CURRENT_SAVE) + + /* get new process' cpu context and restore */ + /* give me start where start context of next task */ + addik r11, r6, TI_CPU_CONTEXT + + /* non-volatile registers */ + lwi r30, r11, CC_R30 + lwi r29, r11, CC_R29 + lwi r28, r11, CC_R28 + lwi r27, r11, CC_R27 + lwi r26, r11, CC_R26 + lwi r25, r11, CC_R25 + lwi r24, r11, CC_R24 + lwi r23, r11, CC_R23 + lwi r22, r11, CC_R22 + lwi r21, r11, CC_R21 + lwi r20, r11, CC_R20 + lwi r19, r11, CC_R19 + /* dedicated registers */ + lwi r18, r11, CC_R18 + lwi r17, r11, CC_R17 + lwi r16, r11, CC_R16 + lwi r15, r11, CC_R15 + lwi r14, r11, CC_R14 + lwi r13, r11, CC_R13 + /* skip volatile registers */ + lwi r2, r11, CC_R2 + lwi r1, r11, CC_R1 + + /* special purpose registers */ + lwi r12, r11, CC_FSR + mts rfsr, r12 + nop + lwi r12, r11, CC_MSR + mts rmsr, r12 + nop + + rtsd r15, 8 + nop + +ENTRY(_reset) + brai 0x70; /* Jump back to FS-boot */ + +ENTRY(_break) + mfs r5, rmsr + nop + swi r5, r0, 0x250 + TOPHYS(r0_ram) + mfs r5, resr + nop + swi r5, r0, 0x254 + TOPHYS(r0_ram) + bri 0 + + /* These are compiled and loaded into high memory, then + * copied into place in mach_early_setup */ + .section .init.ivt, "ax" + .org 0x0 + /* this is very important - here is the reset vector */ + /* in current MMU branch you don't care what is here - it is + * used from bootloader site - but this is correct for FS-BOOT */ + brai 0x70 + nop + brai TOPHYS(_user_exception); /* syscall handler */ + brai TOPHYS(_interrupt); /* Interrupt handler */ + brai TOPHYS(_break); /* nmi trap handler */ + brai TOPHYS(_hw_exception_handler); /* HW exception handler */ + + .org 0x60 + brai TOPHYS(_debug_exception); /* debug trap handler*/ + +.section .rodata,"a" +#include "syscall_table.S" + +syscall_table_size=(.-sys_call_table) + -- cgit v1.2.2 From 7db29dde731db02143418cfa008b7b77ccb2fa57 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:22 +0200 Subject: microblaze_mmu_v2: Update exception handling - MMU exception Signed-off-by: Michal Simek --- arch/microblaze/kernel/hw_exception_handler.S | 746 +++++++++++++++++++++++++- 1 file changed, 739 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index cf9486d99838..9d591cd74fc2 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -53,6 +53,12 @@ * - Illegal instruction opcode * - Divide-by-zero * + * - Privileged instruction exception (MMU) + * - Data storage exception (MMU) + * - Instruction storage exception (MMU) + * - Data TLB miss exception (MMU) + * - Instruction TLB miss exception (MMU) + * * Note we disable interrupts during exception handling, otherwise we will * possibly get multiple re-entrancy if interrupt handles themselves cause * exceptions. JW @@ -71,9 +77,24 @@ #include /* Helpful Macros */ +#ifndef CONFIG_MMU #define EX_HANDLER_STACK_SIZ (4*19) +#endif #define NUM_TO_REG(num) r ## num +#ifdef CONFIG_MMU +/* FIXME you can't change first load of MSR because there is + * hardcoded jump bri 4 */ + #define RESTORE_STATE \ + lwi r3, r1, PT_R3; \ + lwi r4, r1, PT_R4; \ + lwi r5, r1, PT_R5; \ + lwi r6, r1, PT_R6; \ + lwi r11, r1, PT_R11; \ + lwi r31, r1, PT_R31; \ + lwi r1, r0, TOPHYS(r0_ram + 0); +#endif /* CONFIG_MMU */ + #define LWREG_NOP \ bri ex_handler_unhandled; \ nop; @@ -106,6 +127,54 @@ or r3, r0, NUM_TO_REG (regnum); \ bri ex_sw_tail; +#ifdef CONFIG_MMU + #define R3_TO_LWREG_VM_V(regnum) \ + brid ex_lw_end_vm; \ + swi r3, r7, 4 * regnum; + + #define R3_TO_LWREG_VM(regnum) \ + brid ex_lw_end_vm; \ + or NUM_TO_REG (regnum), r0, r3; + + #define SWREG_TO_R3_VM_V(regnum) \ + brid ex_sw_tail_vm; \ + lwi r3, r7, 4 * regnum; + + #define SWREG_TO_R3_VM(regnum) \ + brid ex_sw_tail_vm; \ + or r3, r0, NUM_TO_REG (regnum); + + /* Shift right instruction depending on available configuration */ + #if CONFIG_XILINX_MICROBLAZE0_USE_BARREL > 0 + #define BSRLI(rD, rA, imm) \ + bsrli rD, rA, imm + #elif CONFIG_XILINX_MICROBLAZE0_USE_DIV > 0 + #define BSRLI(rD, rA, imm) \ + ori rD, r0, (1 << imm); \ + idivu rD, rD, rA + #else + #define BSRLI(rD, rA, imm) BSRLI ## imm (rD, rA) + /* Only the used shift constants defined here - add more if needed */ + #define BSRLI2(rD, rA) \ + srl rD, rA; /* << 1 */ \ + srl rD, rD; /* << 2 */ + #define BSRLI10(rD, rA) \ + srl rD, rA; /* << 1 */ \ + srl rD, rD; /* << 2 */ \ + srl rD, rD; /* << 3 */ \ + srl rD, rD; /* << 4 */ \ + srl rD, rD; /* << 5 */ \ + srl rD, rD; /* << 6 */ \ + srl rD, rD; /* << 7 */ \ + srl rD, rD; /* << 8 */ \ + srl rD, rD; /* << 9 */ \ + srl rD, rD /* << 10 */ + #define BSRLI20(rD, rA) \ + BSRLI10(rD, rA); \ + BSRLI10(rD, rD) + #endif +#endif /* CONFIG_MMU */ + .extern other_exception_handler /* Defined in exception.c */ /* @@ -163,34 +232,119 @@ /* wrappers to restore state before coming to entry.S */ +#ifdef CONFIG_MMU +.section .rodata +.align 4 +_MB_HW_ExceptionVectorTable: +/* 0 - Undefined */ + .long TOPHYS(ex_handler_unhandled) +/* 1 - Unaligned data access exception */ + .long TOPHYS(handle_unaligned_ex) +/* 2 - Illegal op-code exception */ + .long TOPHYS(full_exception_trapw) +/* 3 - Instruction bus error exception */ + .long TOPHYS(full_exception_trapw) +/* 4 - Data bus error exception */ + .long TOPHYS(full_exception_trapw) +/* 5 - Divide by zero exception */ + .long TOPHYS(full_exception_trapw) +/* 6 - Floating point unit exception */ + .long TOPHYS(full_exception_trapw) +/* 7 - Privileged instruction exception */ + .long TOPHYS(full_exception_trapw) +/* 8 - 15 - Undefined */ + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) +/* 16 - Data storage exception */ + .long TOPHYS(handle_data_storage_exception) +/* 17 - Instruction storage exception */ + .long TOPHYS(handle_instruction_storage_exception) +/* 18 - Data TLB miss exception */ + .long TOPHYS(handle_data_tlb_miss_exception) +/* 19 - Instruction TLB miss exception */ + .long TOPHYS(handle_instruction_tlb_miss_exception) +/* 20 - 31 - Undefined */ + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) + .long TOPHYS(ex_handler_unhandled) +#endif + .global _hw_exception_handler .section .text .align 4 .ent _hw_exception_handler _hw_exception_handler: +#ifndef CONFIG_MMU addik r1, r1, -(EX_HANDLER_STACK_SIZ); /* Create stack frame */ +#else + swi r1, r0, TOPHYS(r0_ram + 0); /* GET_SP */ + /* Save date to kernel memory. Here is the problem + * when you came from user space */ + ori r1, r0, TOPHYS(r0_ram + 28); +#endif swi r3, r1, PT_R3 swi r4, r1, PT_R4 swi r5, r1, PT_R5 swi r6, r1, PT_R6 - mfs r5, rmsr; - nop - swi r5, r1, 0; - mfs r4, rbtr /* Save BTR before jumping to handler */ - nop +#ifdef CONFIG_MMU + swi r11, r1, PT_R11 + swi r31, r1, PT_R31 + lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */ +#endif + mfs r3, resr nop + mfs r4, rear; + nop +#ifndef CONFIG_MMU andi r5, r3, 0x1000; /* Check ESR[DS] */ beqi r5, not_in_delay_slot; /* Branch if ESR[DS] not set */ mfs r17, rbtr; /* ESR[DS] set - return address in BTR */ nop not_in_delay_slot: swi r17, r1, PT_R17 +#endif andi r5, r3, 0x1F; /* Extract ESR[EXC] */ +#ifdef CONFIG_MMU + /* Calculate exception vector offset = r5 << 2 */ + addk r6, r5, r5; /* << 1 */ + addk r6, r6, r6; /* << 2 */ + +/* counting which exception happen */ + lwi r5, r0, 0x200 + TOPHYS(r0_ram) + addi r5, r5, 1 + swi r5, r0, 0x200 + TOPHYS(r0_ram) + lwi r5, r6, 0x200 + TOPHYS(r0_ram) + addi r5, r5, 1 + swi r5, r6, 0x200 + TOPHYS(r0_ram) +/* end */ + /* Load the HW Exception vector */ + lwi r6, r6, TOPHYS(_MB_HW_ExceptionVectorTable) + bra r6 + +full_exception_trapw: + RESTORE_STATE + bri full_exception_trap +#else /* Exceptions enabled here. This will allow nested exceptions */ mfs r6, rmsr; nop @@ -254,6 +408,7 @@ handle_other_ex: /* Handle Other exceptions here */ lwi r18, r1, PT_R18 bri ex_handler_done; /* Complete exception handling */ +#endif /* 0x01 - Unaligned data access exception * This occurs when a word access is not aligned on a word boundary, @@ -265,11 +420,28 @@ handle_other_ex: /* Handle Other exceptions here */ handle_unaligned_ex: /* Working registers already saved: R3, R4, R5, R6 * R3 = ESR - * R4 = BTR + * R4 = EAR */ - mfs r4, rear; +#ifdef CONFIG_MMU + andi r6, r3, 0x1000 /* Check ESR[DS] */ + beqi r6, _no_delayslot /* Branch if ESR[DS] not set */ + mfs r17, rbtr; /* ESR[DS] set - return address in BTR */ nop +_no_delayslot: +#endif + +#ifdef CONFIG_MMU + /* Check if unaligned address is last on a 4k page */ + andi r5, r4, 0xffc + xori r5, r5, 0xffc + bnei r5, _unaligned_ex2 + _unaligned_ex1: + RESTORE_STATE; +/* Another page must be accessed or physical address not in page table */ + bri unaligned_data_trap + _unaligned_ex2: +#endif andi r6, r3, 0x3E0; /* Mask and extract the register operand */ srl r6, r6; /* r6 >> 5 */ srl r6, r6; @@ -278,6 +450,45 @@ handle_unaligned_ex: srl r6, r6; /* Store the register operand in a temporary location */ sbi r6, r0, TOPHYS(ex_reg_op); +#ifdef CONFIG_MMU + /* Get physical address */ + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + ori r5, r0, CONFIG_KERNEL_START + cmpu r5, r4, r5 + bgti r5, _unaligned_ex3 + ori r5, r0, swapper_pg_dir + bri _unaligned_ex4 + + /* Get the PGD for the current thread. */ +_unaligned_ex3: /* user thread */ + addi r5 ,CURRENT_TASK, TOPHYS(0); /* get current task address */ + lwi r5, r5, TASK_THREAD + PGDIR +_unaligned_ex4: + tophys(r5,r5) + BSRLI(r6,r4,20) /* Create L1 (pgdir/pmd) address */ + andi r6, r6, 0xffc +/* Assume pgdir aligned on 4K boundary, no need for "andi r5,r5,0xfffff003" */ + or r5, r5, r6 + lwi r6, r5, 0 /* Get L1 entry */ + andi r5, r6, 0xfffff000 /* Extract L2 (pte) base address. */ + beqi r5, _unaligned_ex1 /* Bail if no table */ + + tophys(r5,r5) + BSRLI(r6,r4,10) /* Compute PTE address */ + andi r6, r6, 0xffc + andi r5, r5, 0xfffff003 + or r5, r5, r6 + lwi r5, r5, 0 /* Get Linux PTE */ + + andi r6, r5, _PAGE_PRESENT + beqi r6, _unaligned_ex1 /* Bail if no page */ + + andi r5, r5, 0xfffff000 /* Extract RPN */ + andi r4, r4, 0x00000fff /* Extract offset */ + or r4, r4, r5 /* Create physical address */ +#endif /* CONFIG_MMU */ andi r6, r3, 0x400; /* Extract ESR[S] */ bnei r6, ex_sw; @@ -355,6 +566,7 @@ ex_shw: ex_sw_end: /* Exception handling of store word, ends. */ ex_handler_done: +#ifndef CONFIG_MMU lwi r5, r1, 0 /* RMSR */ mts rmsr, r5 nop @@ -366,13 +578,455 @@ ex_handler_done: rted r17, 0 addik r1, r1, (EX_HANDLER_STACK_SIZ); /* Restore stack frame */ +#else + RESTORE_STATE; + rted r17, 0 + nop +#endif + +#ifdef CONFIG_MMU + /* Exception vector entry code. This code runs with address translation + * turned off (i.e. using physical addresses). */ + + /* Exception vectors. */ + + /* 0x10 - Data Storage Exception + * This happens for just a few reasons. U0 set (but we don't do that), + * or zone protection fault (user violation, write to protected page). + * If this is just an update of modified status, we do that quickly + * and exit. Otherwise, we call heavyweight functions to do the work. + */ + handle_data_storage_exception: + /* Working registers already saved: R3, R4, R5, R6 + * R3 = ESR + */ + mfs r11, rpid + nop + bri 4 + mfs r3, rear /* Get faulting address */ + nop + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + ori r4, r0, CONFIG_KERNEL_START + cmpu r4, r3, r4 + bgti r4, ex3 + /* First, check if it was a zone fault (which means a user + * tried to access a kernel or read-protected page - always + * a SEGV). All other faults here must be stores, so no + * need to check ESR_S as well. */ + mfs r4, resr + nop + andi r4, r4, 0x800 /* ESR_Z - zone protection */ + bnei r4, ex2 + + ori r4, r0, swapper_pg_dir + mts rpid, r0 /* TLB will have 0 TID */ + nop + bri ex4 + + /* Get the PGD for the current thread. */ + ex3: + /* First, check if it was a zone fault (which means a user + * tried to access a kernel or read-protected page - always + * a SEGV). All other faults here must be stores, so no + * need to check ESR_S as well. */ + mfs r4, resr + nop + andi r4, r4, 0x800 /* ESR_Z */ + bnei r4, ex2 + /* get current task address */ + addi r4 ,CURRENT_TASK, TOPHYS(0); + lwi r4, r4, TASK_THREAD+PGDIR + ex4: + tophys(r4,r4) + BSRLI(r5,r3,20) /* Create L1 (pgdir/pmd) address */ + andi r5, r5, 0xffc +/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */ + or r4, r4, r5 + lwi r4, r4, 0 /* Get L1 entry */ + andi r5, r4, 0xfffff000 /* Extract L2 (pte) base address */ + beqi r5, ex2 /* Bail if no table */ + + tophys(r5,r5) + BSRLI(r6,r3,10) /* Compute PTE address */ + andi r6, r6, 0xffc + andi r5, r5, 0xfffff003 + or r5, r5, r6 + lwi r4, r5, 0 /* Get Linux PTE */ + + andi r6, r4, _PAGE_RW /* Is it writeable? */ + beqi r6, ex2 /* Bail if not */ + + /* Update 'changed' */ + ori r4, r4, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + swi r4, r5, 0 /* Update Linux page table */ + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + andni r4, r4, 0x0ce2 /* Make sure 20, 21 are zero */ + ori r4, r4, _PAGE_HWEXEC /* make it executable */ + + /* find the TLB index that caused the fault. It has to be here*/ + mts rtlbsx, r3 + nop + mfs r5, rtlbx /* DEBUG: TBD */ + nop + mts rtlblo, r4 /* Load TLB LO */ + nop + /* Will sync shadow TLBs */ + + /* Done...restore registers and get out of here. */ + mts rpid, r11 + nop + bri 4 + + RESTORE_STATE; + rted r17, 0 + nop + ex2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. */ + mts rpid, r11 + nop + bri 4 + RESTORE_STATE; + bri page_fault_data_trap + + + /* 0x11 - Instruction Storage Exception + * This is caused by a fetch from non-execute or guarded pages. */ + handle_instruction_storage_exception: + /* Working registers already saved: R3, R4, R5, R6 + * R3 = ESR + */ + + mfs r3, rear /* Get faulting address */ + nop + RESTORE_STATE; + bri page_fault_instr_trap + + /* 0x12 - Data TLB Miss Exception + * As the name implies, translation is not in the MMU, so search the + * page tables and fix it. The only purpose of this function is to + * load TLB entries from the page table if they exist. + */ + handle_data_tlb_miss_exception: + /* Working registers already saved: R3, R4, R5, R6 + * R3 = ESR + */ + mfs r11, rpid + nop + bri 4 + mfs r3, rear /* Get faulting address */ + nop + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. */ + ori r4, r0, CONFIG_KERNEL_START + cmpu r4, r3, r4 + bgti r4, ex5 + ori r4, r0, swapper_pg_dir + mts rpid, r0 /* TLB will have 0 TID */ + nop + bri ex6 + /* Get the PGD for the current thread. */ + ex5: + /* get current task address */ + addi r4 ,CURRENT_TASK, TOPHYS(0); + lwi r4, r4, TASK_THREAD+PGDIR + ex6: + tophys(r4,r4) + BSRLI(r5,r3,20) /* Create L1 (pgdir/pmd) address */ + andi r5, r5, 0xffc +/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */ + or r4, r4, r5 + lwi r4, r4, 0 /* Get L1 entry */ + andi r5, r4, 0xfffff000 /* Extract L2 (pte) base address */ + beqi r5, ex7 /* Bail if no table */ + + tophys(r5,r5) + BSRLI(r6,r3,10) /* Compute PTE address */ + andi r6, r6, 0xffc + andi r5, r5, 0xfffff003 + or r5, r5, r6 + lwi r4, r5, 0 /* Get Linux PTE */ + + andi r6, r4, _PAGE_PRESENT + beqi r6, ex7 + + ori r4, r4, _PAGE_ACCESSED + swi r4, r5, 0 + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + andni r4, r4, 0x0ce2 /* Make sure 20, 21 are zero */ + + bri finish_tlb_load + ex7: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mts rpid, r11 + nop + bri 4 + RESTORE_STATE; + bri page_fault_data_trap + + /* 0x13 - Instruction TLB Miss Exception + * Nearly the same as above, except we get our information from + * different registers and bailout to a different point. + */ + handle_instruction_tlb_miss_exception: + /* Working registers already saved: R3, R4, R5, R6 + * R3 = ESR + */ + mfs r11, rpid + nop + bri 4 + mfs r3, rear /* Get faulting address */ + nop + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + ori r4, r0, CONFIG_KERNEL_START + cmpu r4, r3, r4 + bgti r4, ex8 + ori r4, r0, swapper_pg_dir + mts rpid, r0 /* TLB will have 0 TID */ + nop + bri ex9 + + /* Get the PGD for the current thread. */ + ex8: + /* get current task address */ + addi r4 ,CURRENT_TASK, TOPHYS(0); + lwi r4, r4, TASK_THREAD+PGDIR + ex9: + tophys(r4,r4) + BSRLI(r5,r3,20) /* Create L1 (pgdir/pmd) address */ + andi r5, r5, 0xffc +/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */ + or r4, r4, r5 + lwi r4, r4, 0 /* Get L1 entry */ + andi r5, r4, 0xfffff000 /* Extract L2 (pte) base address */ + beqi r5, ex10 /* Bail if no table */ + + tophys(r5,r5) + BSRLI(r6,r3,10) /* Compute PTE address */ + andi r6, r6, 0xffc + andi r5, r5, 0xfffff003 + or r5, r5, r6 + lwi r4, r5, 0 /* Get Linux PTE */ + + andi r6, r4, _PAGE_PRESENT + beqi r6, ex7 + + ori r4, r4, _PAGE_ACCESSED + swi r4, r5, 0 + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + andni r4, r4, 0x0ce2 /* Make sure 20, 21 are zero */ + + bri finish_tlb_load + ex10: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mts rpid, r11 + nop + bri 4 + RESTORE_STATE; + bri page_fault_instr_trap + +/* Both the instruction and data TLB miss get to this point to load the TLB. + * r3 - EA of fault + * r4 - TLB LO (info from Linux PTE) + * r5, r6 - available to use + * PID - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + * A common place to load the TLB. + */ + tlb_index: + .long 1 /* MS: storing last used tlb index */ + finish_tlb_load: + /* MS: load the last used TLB index. */ + lwi r5, r0, TOPHYS(tlb_index) + addik r5, r5, 1 /* MS: inc tlb_index -> use next one */ + +/* MS: FIXME this is potential fault, because this is mask not count */ + andi r5, r5, (MICROBLAZE_TLB_SIZE-1) + ori r6, r0, 1 + cmp r31, r5, r6 + blti r31, sem + addik r5, r6, 1 + sem: + /* MS: save back current TLB index */ + swi r5, r0, TOPHYS(tlb_index) + + ori r4, r4, _PAGE_HWEXEC /* make it executable */ + mts rtlbx, r5 /* MS: save current TLB */ + nop + mts rtlblo, r4 /* MS: save to TLB LO */ + nop + + /* Create EPN. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0, and ensure + * bits 20 and 21 are zero. + */ + andi r3, r3, 0xfffff000 + ori r3, r3, 0x0c0 + mts rtlbhi, r3 /* Load TLB HI */ + nop + + /* Done...restore registers and get out of here. */ + ex12: + mts rpid, r11 + nop + bri 4 + RESTORE_STATE; + rted r17, 0 + nop + + /* extern void giveup_fpu(struct task_struct *prev) + * + * The MicroBlaze processor may have an FPU, so this should not just + * return: TBD. + */ + .globl giveup_fpu; + .align 4; + giveup_fpu: + bralid r15,0 /* TBD */ + nop + + /* At present, this routine just hangs. - extern void abort(void) */ + .globl abort; + .align 4; + abort: + br r0 + + .globl set_context; + .align 4; + set_context: + mts rpid, r5 /* Shadow TLBs are automatically */ + nop + bri 4 /* flushed by changing PID */ + rtsd r15,8 + nop + +#endif .end _hw_exception_handler +#ifdef CONFIG_MMU +/* Unaligned data access exception last on a 4k page for MMU. + * When this is called, we are in virtual mode with exceptions enabled + * and registers 1-13,15,17,18 saved. + * + * R3 = ESR + * R4 = EAR + * R7 = pointer to saved registers (struct pt_regs *regs) + * + * This handler perform the access, and returns via ret_from_exc. + */ +.global _unaligned_data_exception +.ent _unaligned_data_exception +_unaligned_data_exception: + andi r8, r3, 0x3E0; /* Mask and extract the register operand */ + BSRLI(r8,r8,2); /* r8 >> 2 = register operand * 8 */ + andi r6, r3, 0x400; /* Extract ESR[S] */ + bneid r6, ex_sw_vm; + andi r6, r3, 0x800; /* Extract ESR[W] - delay slot */ +ex_lw_vm: + beqid r6, ex_lhw_vm; + lbui r5, r4, 0; /* Exception address in r4 - delay slot */ +/* Load a word, byte-by-byte from destination address and save it in tmp space*/ + la r6, r0, ex_tmp_data_loc_0; + sbi r5, r6, 0; + lbui r5, r4, 1; + sbi r5, r6, 1; + lbui r5, r4, 2; + sbi r5, r6, 2; + lbui r5, r4, 3; + sbi r5, r6, 3; + brid ex_lw_tail_vm; +/* Get the destination register value into r3 - delay slot */ + lwi r3, r6, 0; +ex_lhw_vm: + /* Load a half-word, byte-by-byte from destination address and + * save it in tmp space */ + la r6, r0, ex_tmp_data_loc_0; + sbi r5, r6, 0; + lbui r5, r4, 1; + sbi r5, r6, 1; + lhui r3, r6, 0; /* Get the destination register value into r3 */ +ex_lw_tail_vm: + /* Form load_word jump table offset (lw_table_vm + (8 * regnum)) */ + addik r5, r8, lw_table_vm; + bra r5; +ex_lw_end_vm: /* Exception handling of load word, ends */ + brai ret_from_exc; +ex_sw_vm: +/* Form store_word jump table offset (sw_table_vm + (8 * regnum)) */ + addik r5, r8, sw_table_vm; + bra r5; +ex_sw_tail_vm: + la r5, r0, ex_tmp_data_loc_0; + beqid r6, ex_shw_vm; + swi r3, r5, 0; /* Get the word - delay slot */ + /* Store the word, byte-by-byte into destination address */ + lbui r3, r5, 0; + sbi r3, r4, 0; + lbui r3, r5, 1; + sbi r3, r4, 1; + lbui r3, r5, 2; + sbi r3, r4, 2; + lbui r3, r5, 3; + brid ret_from_exc; + sbi r3, r4, 3; /* Delay slot */ +ex_shw_vm: + /* Store the lower half-word, byte-by-byte into destination address */ + lbui r3, r5, 2; + sbi r3, r4, 0; + lbui r3, r5, 3; + brid ret_from_exc; + sbi r3, r4, 1; /* Delay slot */ +ex_sw_end_vm: /* Exception handling of store word, ends. */ +.end _unaligned_data_exception +#endif /* CONFIG_MMU */ + ex_handler_unhandled: /* FIXME add handle function for unhandled exception - dump register */ bri 0 +/* + * hw_exception_handler Jump Table + * - Contains code snippets for each register that caused the unalign exception + * - Hence exception handler is NOT self-modifying + * - Separate table for load exceptions and store exceptions. + * - Each table is of size: (8 * 32) = 256 bytes + */ + .section .text .align 4 lw_table: @@ -407,7 +1061,11 @@ lw_r27: R3_TO_LWREG (27); lw_r28: R3_TO_LWREG (28); lw_r29: R3_TO_LWREG (29); lw_r30: R3_TO_LWREG (30); +#ifdef CONFIG_MMU +lw_r31: R3_TO_LWREG_V (31); +#else lw_r31: R3_TO_LWREG (31); +#endif sw_table: sw_r0: SWREG_TO_R3 (0); @@ -441,7 +1099,81 @@ sw_r27: SWREG_TO_R3 (27); sw_r28: SWREG_TO_R3 (28); sw_r29: SWREG_TO_R3 (29); sw_r30: SWREG_TO_R3 (30); +#ifdef CONFIG_MMU +sw_r31: SWREG_TO_R3_V (31); +#else sw_r31: SWREG_TO_R3 (31); +#endif + +#ifdef CONFIG_MMU +lw_table_vm: +lw_r0_vm: R3_TO_LWREG_VM (0); +lw_r1_vm: R3_TO_LWREG_VM_V (1); +lw_r2_vm: R3_TO_LWREG_VM_V (2); +lw_r3_vm: R3_TO_LWREG_VM_V (3); +lw_r4_vm: R3_TO_LWREG_VM_V (4); +lw_r5_vm: R3_TO_LWREG_VM_V (5); +lw_r6_vm: R3_TO_LWREG_VM_V (6); +lw_r7_vm: R3_TO_LWREG_VM_V (7); +lw_r8_vm: R3_TO_LWREG_VM_V (8); +lw_r9_vm: R3_TO_LWREG_VM_V (9); +lw_r10_vm: R3_TO_LWREG_VM_V (10); +lw_r11_vm: R3_TO_LWREG_VM_V (11); +lw_r12_vm: R3_TO_LWREG_VM_V (12); +lw_r13_vm: R3_TO_LWREG_VM_V (13); +lw_r14_vm: R3_TO_LWREG_VM (14); +lw_r15_vm: R3_TO_LWREG_VM_V (15); +lw_r16_vm: R3_TO_LWREG_VM (16); +lw_r17_vm: R3_TO_LWREG_VM_V (17); +lw_r18_vm: R3_TO_LWREG_VM_V (18); +lw_r19_vm: R3_TO_LWREG_VM (19); +lw_r20_vm: R3_TO_LWREG_VM (20); +lw_r21_vm: R3_TO_LWREG_VM (21); +lw_r22_vm: R3_TO_LWREG_VM (22); +lw_r23_vm: R3_TO_LWREG_VM (23); +lw_r24_vm: R3_TO_LWREG_VM (24); +lw_r25_vm: R3_TO_LWREG_VM (25); +lw_r26_vm: R3_TO_LWREG_VM (26); +lw_r27_vm: R3_TO_LWREG_VM (27); +lw_r28_vm: R3_TO_LWREG_VM (28); +lw_r29_vm: R3_TO_LWREG_VM (29); +lw_r30_vm: R3_TO_LWREG_VM (30); +lw_r31_vm: R3_TO_LWREG_VM_V (31); + +sw_table_vm: +sw_r0_vm: SWREG_TO_R3_VM (0); +sw_r1_vm: SWREG_TO_R3_VM_V (1); +sw_r2_vm: SWREG_TO_R3_VM_V (2); +sw_r3_vm: SWREG_TO_R3_VM_V (3); +sw_r4_vm: SWREG_TO_R3_VM_V (4); +sw_r5_vm: SWREG_TO_R3_VM_V (5); +sw_r6_vm: SWREG_TO_R3_VM_V (6); +sw_r7_vm: SWREG_TO_R3_VM_V (7); +sw_r8_vm: SWREG_TO_R3_VM_V (8); +sw_r9_vm: SWREG_TO_R3_VM_V (9); +sw_r10_vm: SWREG_TO_R3_VM_V (10); +sw_r11_vm: SWREG_TO_R3_VM_V (11); +sw_r12_vm: SWREG_TO_R3_VM_V (12); +sw_r13_vm: SWREG_TO_R3_VM_V (13); +sw_r14_vm: SWREG_TO_R3_VM (14); +sw_r15_vm: SWREG_TO_R3_VM_V (15); +sw_r16_vm: SWREG_TO_R3_VM (16); +sw_r17_vm: SWREG_TO_R3_VM_V (17); +sw_r18_vm: SWREG_TO_R3_VM_V (18); +sw_r19_vm: SWREG_TO_R3_VM (19); +sw_r20_vm: SWREG_TO_R3_VM (20); +sw_r21_vm: SWREG_TO_R3_VM (21); +sw_r22_vm: SWREG_TO_R3_VM (22); +sw_r23_vm: SWREG_TO_R3_VM (23); +sw_r24_vm: SWREG_TO_R3_VM (24); +sw_r25_vm: SWREG_TO_R3_VM (25); +sw_r26_vm: SWREG_TO_R3_VM (26); +sw_r27_vm: SWREG_TO_R3_VM (27); +sw_r28_vm: SWREG_TO_R3_VM (28); +sw_r29_vm: SWREG_TO_R3_VM (29); +sw_r30_vm: SWREG_TO_R3_VM (30); +sw_r31_vm: SWREG_TO_R3_VM_V (31); +#endif /* CONFIG_MMU */ /* Temporary data structures used in the handler */ .section .data -- cgit v1.2.2 From 0d6de9532663a4120ce35f507f16b72df382e360 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:23 +0200 Subject: microblaze_mmu_v2: uaccess MMU update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/uaccess.h | 298 ++++++++++++++++++++++++++++------ arch/microblaze/lib/uaccess_old.S | 135 +++++++++++++++ 2 files changed, 381 insertions(+), 52 deletions(-) create mode 100644 arch/microblaze/lib/uaccess_old.S (limited to 'arch') diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index a4e171d49d15..65adad61e7e9 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -26,6 +28,10 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 +#define __clear_user(addr, n) (memset((void *)(addr), 0, (n)), 0) + +#ifndef CONFIG_MMU + extern int ___range_ok(unsigned long addr, unsigned long size); #define __range_ok(addr, size) \ @@ -38,63 +44,64 @@ extern int ___range_ok(unsigned long addr, unsigned long size); extern int bad_user_access_length(void); /* FIXME this is function for optimalization -> memcpy */ -#define __get_user(var, ptr) \ - ({ \ - int __gu_err = 0; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - (var) = *(ptr); \ - break; \ - case 8: \ - memcpy((void *) &(var), (ptr), 8); \ - break; \ - default: \ - (var) = 0; \ - __gu_err = __get_user_bad(); \ - break; \ - } \ - __gu_err; \ - }) +#define __get_user(var, ptr) \ +({ \ + int __gu_err = 0; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + (var) = *(ptr); \ + break; \ + case 8: \ + memcpy((void *) &(var), (ptr), 8); \ + break; \ + default: \ + (var) = 0; \ + __gu_err = __get_user_bad(); \ + break; \ + } \ + __gu_err; \ +}) #define __get_user_bad() (bad_user_access_length(), (-EFAULT)) +/* FIXME is not there defined __pu_val */ #define __put_user(var, ptr) \ - ({ \ - int __pu_err = 0; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - *(ptr) = (var); \ - break; \ - case 8: { \ - typeof(*(ptr)) __pu_val = var; \ - memcpy(ptr, &__pu_val, sizeof(__pu_val));\ - } \ - break; \ - default: \ - __pu_err = __put_user_bad(); \ - break; \ - } \ - __pu_err; \ - }) +({ \ + int __pu_err = 0; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + *(ptr) = (var); \ + break; \ + case 8: { \ + typeof(*(ptr)) __pu_val = (var); \ + memcpy(ptr, &__pu_val, sizeof(__pu_val)); \ + } \ + break; \ + default: \ + __pu_err = __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) #define __put_user_bad() (bad_user_access_length(), (-EFAULT)) -#define put_user(x, ptr) __put_user(x, ptr) -#define get_user(x, ptr) __get_user(x, ptr) - -#define copy_to_user(to, from, n) (memcpy(to, from, n), 0) -#define copy_from_user(to, from, n) (memcpy(to, from, n), 0) +#define put_user(x, ptr) __put_user((x), (ptr)) +#define get_user(x, ptr) __get_user((x), (ptr)) -#define __copy_to_user(to, from, n) (copy_to_user(to, from, n)) -#define __copy_from_user(to, from, n) (copy_from_user(to, from, n)) -#define __copy_to_user_inatomic(to, from, n) (__copy_to_user(to, from, n)) -#define __copy_from_user_inatomic(to, from, n) (__copy_from_user(to, from, n)) +#define copy_to_user(to, from, n) (memcpy((to), (from), (n)), 0) +#define copy_from_user(to, from, n) (memcpy((to), (from), (n)), 0) -#define __clear_user(addr, n) (memset((void *)addr, 0, n), 0) +#define __copy_to_user(to, from, n) (copy_to_user((to), (from), (n))) +#define __copy_from_user(to, from, n) (copy_from_user((to), (from), (n))) +#define __copy_to_user_inatomic(to, from, n) \ + (__copy_to_user((to), (from), (n))) +#define __copy_from_user_inatomic(to, from, n) \ + (__copy_from_user((to), (from), (n))) static inline unsigned long clear_user(void *addr, unsigned long size) { @@ -103,13 +110,200 @@ static inline unsigned long clear_user(void *addr, unsigned long size) return size; } -/* Returns 0 if exception not found and fixup otherwise. */ +/* Returns 0 if exception not found and fixup otherwise. */ extern unsigned long search_exception_table(unsigned long); +extern long strncpy_from_user(char *dst, const char *src, long count); +extern long strnlen_user(const char *src, long count); + +#else /* CONFIG_MMU */ + +/* + * Address is valid if: + * - "addr", "addr + size" and "size" are all below the limit + */ +#define access_ok(type, addr, size) \ + (get_fs().seg > (((unsigned long)(addr)) | \ + (size) | ((unsigned long)(addr) + (size)))) + +/* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n", + type?"WRITE":"READ",addr,size,get_fs().seg)) */ + +/* + * All the __XXX versions macros/functions below do not perform + * access checking. It is assumed that the necessary checks have been + * already performed before the finction (macro) is called. + */ + +#define get_user(x, ptr) \ +({ \ + access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) \ + ? __get_user((x), (ptr)) : -EFAULT; \ +}) + +#define put_user(x, ptr) \ +({ \ + access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) \ + ? __put_user((x), (ptr)) : -EFAULT; \ +}) + +#define __get_user(x, ptr) \ +({ \ + unsigned long __gu_val; \ + /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \ + long __gu_err; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \ + break; \ + case 2: \ + __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \ + break; \ + case 4: \ + __get_user_asm("lw", (ptr), __gu_val, __gu_err); \ + break; \ + default: \ + __gu_val = 0; __gu_err = -EINVAL; \ + } \ + x = (__typeof__(*(ptr))) __gu_val; \ + __gu_err; \ +}) + +#define __get_user_asm(insn, __gu_ptr, __gu_val, __gu_err) \ +({ \ + __asm__ __volatile__ ( \ + "1:" insn " %1, %2, r0; \ + addk %0, r0, r0; \ + 2: \ + .section .fixup,\"ax\"; \ + 3: brid 2b; \ + addik %0, r0, %3; \ + .previous; \ + .section __ex_table,\"a\"; \ + .word 1b,3b; \ + .previous;" \ + : "=r"(__gu_err), "=r"(__gu_val) \ + : "r"(__gu_ptr), "i"(-EFAULT) \ + ); \ +}) + +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __gu_val = x; \ + long __gu_err = 0; \ + switch (sizeof(__gu_val)) { \ + case 1: \ + __put_user_asm("sb", (ptr), __gu_val, __gu_err); \ + break; \ + case 2: \ + __put_user_asm("sh", (ptr), __gu_val, __gu_err); \ + break; \ + case 4: \ + __put_user_asm("sw", (ptr), __gu_val, __gu_err); \ + break; \ + case 8: \ + __put_user_asm_8((ptr), __gu_val, __gu_err); \ + break; \ + default: \ + __gu_err = -EINVAL; \ + } \ + __gu_err; \ +}) + +#define __put_user_asm_8(__gu_ptr, __gu_val, __gu_err) \ +({ \ +__asm__ __volatile__ (" lwi %0, %1, 0; \ + 1: swi %0, %2, 0; \ + lwi %0, %1, 4; \ + 2: swi %0, %2, 4; \ + addk %0,r0,r0; \ + 3: \ + .section .fixup,\"ax\"; \ + 4: brid 3b; \ + addik %0, r0, %3; \ + .previous; \ + .section __ex_table,\"a\"; \ + .word 1b,4b,2b,4b; \ + .previous;" \ + : "=&r"(__gu_err) \ + : "r"(&__gu_val), \ + "r"(__gu_ptr), "i"(-EFAULT) \ + ); \ +}) + +#define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err) \ +({ \ + __asm__ __volatile__ ( \ + "1:" insn " %1, %2, r0; \ + addk %0, r0, r0; \ + 2: \ + .section .fixup,\"ax\"; \ + 3: brid 2b; \ + addik %0, r0, %3; \ + .previous; \ + .section __ex_table,\"a\"; \ + .word 1b,3b; \ + .previous;" \ + : "=r"(__gu_err) \ + : "r"(__gu_val), "r"(__gu_ptr), "i"(-EFAULT) \ + ); \ +}) + +/* + * Return: number of not copied bytes, i.e. 0 if OK or non-zero if fail. + */ +static inline int clear_user(char *to, int size) +{ + if (size && access_ok(VERIFY_WRITE, to, size)) { + __asm__ __volatile__ (" \ + 1: \ + sb r0, %2, r0; \ + addik %0, %0, -1; \ + bneid %0, 1b; \ + addik %2, %2, 1; \ + 2: \ + .section __ex_table,\"a\"; \ + .word 1b,2b; \ + .section .text;" \ + : "=r"(size) \ + : "0"(size), "r"(to) + ); + } + return size; +} + +extern unsigned long __copy_tofrom_user(void __user *to, + const void __user *from, unsigned long size); + +#define copy_to_user(to, from, n) \ + (access_ok(VERIFY_WRITE, (to), (n)) ? \ + __copy_tofrom_user((void __user *)(to), \ + (__force const void __user *)(from), (n)) \ + : -EFAULT) + +#define __copy_to_user(to, from, n) copy_to_user((to), (from), (n)) +#define __copy_to_user_inatomic(to, from, n) copy_to_user((to), (from), (n)) + +#define copy_from_user(to, from, n) \ + (access_ok(VERIFY_READ, (from), (n)) ? \ + __copy_tofrom_user((__force void __user *)(to), \ + (void __user *)(from), (n)) \ + : -EFAULT) + +#define __copy_from_user(to, from, n) copy_from_user((to), (from), (n)) +#define __copy_from_user_inatomic(to, from, n) \ + copy_from_user((to), (from), (n)) + +extern int __strncpy_user(char *to, const char __user *from, int len); +extern int __strnlen_user(const char __user *sstr, int len); + +#define strncpy_from_user(to, from, len) \ + (access_ok(VERIFY_READ, from, 1) ? \ + __strncpy_user(to, from, len) : -EFAULT) +#define strnlen_user(str, len) \ + (access_ok(VERIFY_READ, str, 1) ? __strnlen_user(str, len) : 0) -extern long strncpy_from_user(char *dst, const char __user *src, long count); -extern long strnlen_user(const char __user *src, long count); -extern long __strncpy_from_user(char *dst, const char __user *src, long count); +#endif /* CONFIG_MMU */ /* * The exception table consists of pairs of addresses: the first is the diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S new file mode 100644 index 000000000000..67f991c14b8a --- /dev/null +++ b/arch/microblaze/lib/uaccess_old.S @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2009 Michal Simek + * Copyright (C) 2009 PetaLogix + * Copyright (C) 2007 LynuxWorks, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include + +/* + * int __strncpy_user(char *to, char *from, int len); + * + * Returns: + * -EFAULT for an exception + * len if we hit the buffer limit + * bytes copied + */ + + .text +.globl __strncpy_user; +.align 4; +__strncpy_user: + + /* + * r5 - to + * r6 - from + * r7 - len + * r3 - temp count + * r4 - temp val + */ + addik r3,r7,0 /* temp_count = len */ + beqi r3,3f +1: + lbu r4,r6,r0 + sb r4,r5,r0 + + addik r3,r3,-1 + beqi r3,2f /* break on len */ + + addik r5,r5,1 + bneid r4,1b + addik r6,r6,1 /* delay slot */ + addik r3,r3,1 /* undo "temp_count--" */ +2: + rsubk r3,r3,r7 /* temp_count = len - temp_count */ +3: + rtsd r15,8 + nop + + + .section .fixup, "ax" + .align 2 +4: + brid 3b + addik r3,r0, -EFAULT + + .section __ex_table, "a" + .word 1b,4b + +/* + * int __strnlen_user(char __user *str, int maxlen); + * + * Returns: + * 0 on error + * maxlen + 1 if no NUL byte found within maxlen bytes + * size of the string (including NUL byte) + */ + + .text +.globl __strnlen_user; +.align 4; +__strnlen_user: + addik r3,r6,0 + beqi r3,3f +1: + lbu r4,r5,r0 + beqid r4,2f /* break on NUL */ + addik r3,r3,-1 /* delay slot */ + + bneid r3,1b + addik r5,r5,1 /* delay slot */ + + addik r3,r3,-1 /* for break on len */ +2: + rsubk r3,r3,r6 +3: + rtsd r15,8 + nop + + + .section .fixup,"ax" +4: + brid 3b + addk r3,r0,r0 + + .section __ex_table,"a" + .word 1b,4b + +/* + * int __copy_tofrom_user(char *to, char *from, int len) + * Return: + * 0 on success + * number of not copied bytes on error + */ + .text +.globl __copy_tofrom_user; +.align 4; +__copy_tofrom_user: + /* + * r5 - to + * r6 - from + * r7, r3 - count + * r4 - tempval + */ + addik r3,r7,0 + beqi r3,3f +1: + lbu r4,r6,r0 + addik r6,r6,1 +2: + sb r4,r5,r0 + addik r3,r3,-1 + bneid r3,1b + addik r5,r5,1 /* delay slot */ +3: + rtsd r15,8 + nop + + + .section __ex_table,"a" + .word 1b,3b,2b,3b -- cgit v1.2.2 From 4bb73c3de752dc386958be32dc7e1d9fefdcbbf0 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:24 +0200 Subject: microblaze_mmu_v2: Add MMU related exceptions handling Signed-off-by: Michal Simek --- arch/microblaze/kernel/exceptions.c | 45 ++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index 4a8a4064c7ee..0cb64a31e89a 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -21,9 +21,9 @@ #include #include /* For KM CPU var */ -#include -#include -#include +#include +#include +#include #include #define MICROBLAZE_ILL_OPCODE_EXCEPTION 0x02 @@ -31,7 +31,7 @@ #define MICROBLAZE_DBUS_EXCEPTION 0x04 #define MICROBLAZE_DIV_ZERO_EXCEPTION 0x05 #define MICROBLAZE_FPU_EXCEPTION 0x06 -#define MICROBLAZE_PRIVILEG_EXCEPTION 0x07 +#define MICROBLAZE_PRIVILEGED_EXCEPTION 0x07 static DEFINE_SPINLOCK(die_lock); @@ -66,6 +66,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, int fsr, int addr) { +#ifdef CONFIG_MMU + int code; + addr = regs->pc; +#endif + #if 0 printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n", type, user_mode(regs) ? "user" : "kernel", fsr, @@ -74,7 +79,13 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, switch (type & 0x1F) { case MICROBLAZE_ILL_OPCODE_EXCEPTION: - _exception(SIGILL, regs, ILL_ILLOPC, addr); + if (user_mode(regs)) { + printk(KERN_WARNING "Illegal opcode exception in user mode.\n"); + _exception(SIGILL, regs, ILL_ILLOPC, addr); + return; + } + printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n"); + die("opcode exception", regs, SIGBUS); break; case MICROBLAZE_IBUS_EXCEPTION: if (user_mode(regs)) { @@ -95,11 +106,16 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, die("bus exception", regs, SIGBUS); break; case MICROBLAZE_DIV_ZERO_EXCEPTION: - printk(KERN_WARNING "Divide by zero exception\n"); - _exception(SIGILL, regs, ILL_ILLOPC, addr); + if (user_mode(regs)) { + printk(KERN_WARNING "Divide by zero exception in user mode\n"); + _exception(SIGILL, regs, ILL_ILLOPC, addr); + return; + } + printk(KERN_WARNING "Divide by zero exception in kernel mode.\n"); + die("Divide by exception", regs, SIGBUS); break; - case MICROBLAZE_FPU_EXCEPTION: + printk(KERN_WARNING "FPU exception\n"); /* IEEE FP exception */ /* I removed fsr variable and use code var for storing fsr */ if (fsr & FSR_IO) @@ -115,7 +131,20 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, _exception(SIGFPE, regs, fsr, addr); break; +#ifdef CONFIG_MMU + case MICROBLAZE_PRIVILEGED_EXCEPTION: + printk(KERN_WARNING "Privileged exception\n"); + /* "brk r0,r0" - used as debug breakpoint */ + if (get_user(code, (unsigned long *)regs->pc) == 0 + && code == 0x980c0000) { + _exception(SIGTRAP, regs, TRAP_BRKPT, addr); + } else { + _exception(SIGILL, regs, ILL_PRVOPC, addr); + } + break; +#endif default: + /* FIXME what to do in unexpected exception */ printk(KERN_WARNING "Unexpected exception %02x " "PC=%08x in %s mode\n", type, (unsigned int) addr, kernel_mode(regs) ? "kernel" : "user"); -- cgit v1.2.2 From d4c1285ef0d6b005f4e7651ee1e7cf304b4e97dc Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:24 +0200 Subject: microblaze_mmu_v2: Update linker script for MMU Signed-off-by: Michal Simek --- arch/microblaze/kernel/vmlinux.lds.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index 910f7c09b92a..8ae807ab7a51 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -17,8 +17,7 @@ ENTRY(_start) jiffies = jiffies_64 + 4; SECTIONS { - . = CONFIG_KERNEL_BASE_ADDR; - + . = CONFIG_KERNEL_START; .text : { _text = . ; _stext = . ; -- cgit v1.2.2 From 65504a47e02e4e6e58884376f4a700f83cc8234f Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:26 +0200 Subject: microblaze_mmu_v2: Enable fork syscall for MMU and add fork as vfork for noMMU Signed-off-by: Michal Simek --- arch/microblaze/kernel/entry-nommu.S | 2 +- arch/microblaze/kernel/syscall_table.S | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S index f24b1268baaf..1fce6b803f54 100644 --- a/arch/microblaze/kernel/entry-nommu.S +++ b/arch/microblaze/kernel/entry-nommu.S @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 3bb42ec924c2..376d1789f7c0 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -2,7 +2,11 @@ ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, * used for restarting */ .long sys_exit - .long sys_ni_syscall /* was fork */ +#ifdef CONFIG_MMU + .long sys_fork_wrapper +#else + .long sys_ni_syscall +#endif .long sys_read .long sys_write .long sys_open /* 5 */ -- cgit v1.2.2 From 17f3324c3eb271882b9e6b8fc3b1698290121801 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:27 +0200 Subject: microblaze_mmu_v2: Traps MMU update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/exceptions.h | 24 ++++++++-------------- arch/microblaze/kernel/traps.c | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/exceptions.h b/arch/microblaze/include/asm/exceptions.h index 24ca540e77c0..90731df9e574 100644 --- a/arch/microblaze/include/asm/exceptions.h +++ b/arch/microblaze/include/asm/exceptions.h @@ -1,8 +1,8 @@ /* * Preliminary support for HW exception handing for Microblaze * - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2008 PetaLogix + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2005 John Williams * * This file is subject to the terms and conditions of the GNU General @@ -64,21 +64,13 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, void die(const char *str, struct pt_regs *fp, long err); void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr); -#if defined(CONFIG_XMON) -extern void xmon(struct pt_regs *regs); -extern int xmon_bpt(struct pt_regs *regs); -extern int xmon_sstep(struct pt_regs *regs); -extern int xmon_iabr_match(struct pt_regs *regs); -extern int xmon_dabr_match(struct pt_regs *regs); -extern void (*xmon_fault_handler)(struct pt_regs *regs); +#ifdef CONFIG_MMU +void __bug(const char *file, int line, void *data); +int bad_trap(int trap_num, struct pt_regs *regs); +int debug_trap(struct pt_regs *regs); +#endif /* CONFIG_MMU */ -void (*debugger)(struct pt_regs *regs) = xmon; -int (*debugger_bpt)(struct pt_regs *regs) = xmon_bpt; -int (*debugger_sstep)(struct pt_regs *regs) = xmon_sstep; -int (*debugger_iabr_match)(struct pt_regs *regs) = xmon_iabr_match; -int (*debugger_dabr_match)(struct pt_regs *regs) = xmon_dabr_match; -void (*debugger_fault_handler)(struct pt_regs *regs); -#elif defined(CONFIG_KGDB) +#if defined(CONFIG_KGDB) void (*debugger)(struct pt_regs *regs); int (*debugger_bpt)(struct pt_regs *regs); int (*debugger_sstep)(struct pt_regs *regs); diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 6322cc174474..eaaaf805f31b 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -97,3 +97,37 @@ void dump_stack(void) show_stack(NULL, NULL); } EXPORT_SYMBOL(dump_stack); + +#ifdef CONFIG_MMU +void __bug(const char *file, int line, void *data) +{ + if (data) + printk(KERN_CRIT "kernel BUG at %s:%d (data = %p)!\n", + file, line, data); + else + printk(KERN_CRIT "kernel BUG at %s:%d!\n", file, line); + + machine_halt(); +} + +int bad_trap(int trap_num, struct pt_regs *regs) +{ + printk(KERN_CRIT + "unimplemented trap %d called at 0x%08lx, pid %d!\n", + trap_num, regs->pc, current->pid); + return -ENOSYS; +} + +int debug_trap(struct pt_regs *regs) +{ + int i; + printk(KERN_CRIT "debug trap\n"); + for (i = 0; i < 32; i++) { + /* printk("r%i:%08X\t",i,regs->gpr[i]); */ + if ((i % 4) == 3) + printk(KERN_CRIT "\n"); + } + printk(KERN_CRIT "pc:%08lX\tmsr:%08lX\n", regs->pc, regs->msr); + return -ENOSYS; +} +#endif -- cgit v1.2.2 From 8b28626a6b1522b39f75d0bf80d5dec23c931f5a Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:28 +0200 Subject: microblaze_mmu_v2: Update signal returning address Signed-off-by: Michal Simek --- arch/microblaze/kernel/signal.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 3cdcbfe41295..4c0e6521b114 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -388,7 +388,15 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler) case -ERESTARTNOINTR: do_restart: /* offset of 4 bytes to re-execute trap (brki) instruction */ +#ifndef CONFIG_MMU regs->pc -= 4; +#else + /* offset of 8 bytes required = 4 for rtbd + offset, plus 4 for size of + "brki r14,8" + instruction. */ + regs->pc -= 8; +#endif break; } } -- cgit v1.2.2 From 46fb9be93b15bd8315622ad2f85f9516c064a785 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:28 +0200 Subject: microblaze_mmu_v2: Update cacheflush.h Signed-off-by: Michal Simek --- arch/microblaze/include/asm/cacheflush.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h index 3300b785049b..f989d6aad648 100644 --- a/arch/microblaze/include/asm/cacheflush.h +++ b/arch/microblaze/include/asm/cacheflush.h @@ -1,5 +1,6 @@ /* - * Copyright (C) 2007 PetaLogix + * Copyright (C) 2007-2009 Michal Simek + * Copyright (C) 2007-2009 PetaLogix * Copyright (C) 2007 John Williams * based on v850 version which was * Copyright (C) 2001,02,03 NEC Electronics Corporation @@ -43,6 +44,23 @@ #define flush_icache_range(start, len) __invalidate_icache_range(start, len) #define flush_icache_page(vma, pg) do { } while (0) +#ifndef CONFIG_MMU +# define flush_icache_user_range(start, len) do { } while (0) +#else +# define flush_icache_user_range(vma, pg, adr, len) __invalidate_icache_all() + +# define flush_page_to_ram(page) do { } while (0) + +# define flush_icache() __invalidate_icache_all() +# define flush_cache_sigtramp(vaddr) \ + __invalidate_icache_range(vaddr, vaddr + 8) + +# define flush_dcache_mmap_lock(mapping) do { } while (0) +# define flush_dcache_mmap_unlock(mapping) do { } while (0) + +# define flush_cache_dup_mm(mm) do { } while (0) +#endif + #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) -- cgit v1.2.2 From dcffc1b29115cc26686b8ae62b587e63f0e139f0 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:29 +0200 Subject: microblaze_mmu_v2: Update dma.h for MMU Signed-off-by: Michal Simek --- arch/microblaze/include/asm/dma.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/dma.h b/arch/microblaze/include/asm/dma.h index 0967fa04fc5e..08c073badf19 100644 --- a/arch/microblaze/include/asm/dma.h +++ b/arch/microblaze/include/asm/dma.h @@ -9,8 +9,13 @@ #ifndef _ASM_MICROBLAZE_DMA_H #define _ASM_MICROBLAZE_DMA_H +#ifndef CONFIG_MMU /* we don't have dma address limit. define it as zero to be * unlimited. */ #define MAX_DMA_ADDRESS (0) +#else +/* Virtual address corresponding to last available physical memory address. */ +#define MAX_DMA_ADDRESS (CONFIG_KERNEL_START + memory_size - 1) +#endif #endif /* _ASM_MICROBLAZE_DMA_H */ -- cgit v1.2.2 From e57a221f15fd62921d845ddb37ce048fc7a38fd6 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:30 +0200 Subject: microblaze_mmu_v2: Elf update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/elf.h | 93 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h index 81337f241347..f92fc0dda006 100644 --- a/arch/microblaze/include/asm/elf.h +++ b/arch/microblaze/include/asm/elf.h @@ -1,4 +1,6 @@ /* + * Copyright (C) 2008-2009 Michal Simek + * Copyright (C) 2008-2009 PetaLogix * Copyright (C) 2006 Atmark Techno, Inc. * * This file is subject to the terms and conditions of the GNU General Public @@ -27,4 +29,95 @@ */ #define ELF_CLASS ELFCLASS32 +#ifndef __uClinux__ + +/* + * ELF register definitions.. + */ + +#include +#include + +#ifndef ELF_GREG_T +#define ELF_GREG_T +typedef unsigned long elf_greg_t; +#endif + +#ifndef ELF_NGREG +#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) +#endif + +#ifndef ELF_GREGSET_T +#define ELF_GREGSET_T +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; +#endif + +#ifndef ELF_FPREGSET_T +#define ELF_FPREGSET_T + +/* TBD */ +#define ELF_NFPREG 33 /* includes fsr */ +typedef unsigned long elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +/* typedef struct user_fpu_struct elf_fpregset_t; */ +#endif + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + * use of this is to invoke "./ld.so someprog" to test out a new version of + * the loader. We need to make sure that it is out of the way of the program + * that it will "exec", and that there is sufficient room for the brk. + */ + +#define ELF_ET_DYN_BASE (0x08000000) + +#ifdef __LITTLE_ENDIAN__ +#define ELF_DATA ELFDATA2LSB +#else +#define ELF_DATA ELFDATA2MSB +#endif + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + + +#define ELF_CORE_COPY_REGS(_dest, _regs) \ + memcpy((char *) &_dest, (char *) _regs, \ + sizeof(struct pt_regs)); + +/* This yields a mask that user programs can use to figure out what + * instruction set this CPU supports. This could be done in user space, + * but it's not easy, and we've already done it here. + */ +#define ELF_HWCAP (0) + +/* This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + + * For the moment, we have only optimizations for the Intel generations, + * but that could change... + */ +#define ELF_PLATFORM (NULL) + +/* Added _f parameter. Is this definition correct: TBD */ +#define ELF_PLAT_INIT(_r, _f) \ +do { \ + _r->r1 = _r->r1 = _r->r2 = _r->r3 = \ + _r->r4 = _r->r5 = _r->r6 = _r->r7 = \ + _r->r8 = _r->r9 = _r->r10 = _r->r11 = \ + _r->r12 = _r->r13 = _r->r14 = _r->r15 = \ + _r->r16 = _r->r17 = _r->r18 = _r->r19 = \ + _r->r20 = _r->r21 = _r->r22 = _r->r23 = \ + _r->r24 = _r->r25 = _r->r26 = _r->r27 = \ + _r->r28 = _r->r29 = _r->r30 = _r->r31 = \ + 0; \ +} while (0) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT) +#endif + +#endif /* __uClinux__ */ + #endif /* _ASM_MICROBLAZE_ELF_H */ -- cgit v1.2.2 From 8b3f7d5c0ba7600222744588126388b225c61f18 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:31 +0200 Subject: microblaze_mmu_v2: stat.h MMU update Signed-off-by: Michal Simek --- arch/microblaze/include/asm/stat.h | 77 ++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/stat.h b/arch/microblaze/include/asm/stat.h index 5f18b8aed220..a15f77520bfd 100644 --- a/arch/microblaze/include/asm/stat.h +++ b/arch/microblaze/include/asm/stat.h @@ -16,58 +16,53 @@ #include +#define STAT_HAVE_NSEC 1 + struct stat { - unsigned int st_dev; + unsigned long st_dev; unsigned long st_ino; unsigned int st_mode; unsigned int st_nlink; unsigned int st_uid; unsigned int st_gid; - unsigned int st_rdev; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime; - unsigned long __unused1; /* unsigned long st_atime_nsec */ - unsigned long st_mtime; - unsigned long __unused2; /* unsigned long st_mtime_nsec */ - unsigned long st_ctime; - unsigned long __unused3; /* unsigned long st_ctime_nsec */ + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + long st_blocks; + int st_atime; + unsigned int st_atime_nsec; + int st_mtime; + unsigned int st_mtime_nsec; + int st_ctime; + unsigned int st_ctime_nsec; unsigned long __unused4; unsigned long __unused5; }; struct stat64 { - unsigned long long st_dev; - unsigned long __unused1; - - unsigned long long st_ino; - - unsigned int st_mode; - unsigned int st_nlink; - - unsigned int st_uid; - unsigned int st_gid; - - unsigned long long st_rdev; - unsigned long __unused3; - - long long st_size; - unsigned long st_blksize; - - unsigned long st_blocks; /* No. of 512-byte blocks allocated */ - unsigned long __unused4; /* future possible st_blocks high bits */ - - unsigned long st_atime; - unsigned long st_atime_nsec; - - unsigned long st_mtime; - unsigned long st_mtime_nsec; - - unsigned long st_ctime; - unsigned long st_ctime_nsec; - - unsigned long __unused8; + unsigned long long st_dev; /* Device. */ + unsigned long long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long long st_rdev; /* Device number, if device. */ + unsigned long long __pad1; + long long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int st_atime_nsec; + int st_mtime; /* Time of last modification. */ + unsigned int st_mtime_nsec; + int st_ctime; /* Time of last status change. */ + unsigned int st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; }; #endif /* _ASM_MICROBLAZE_STAT_H */ + -- cgit v1.2.2 From a116f6d5db4476d0c941b495a6122b0130bbf20b Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:31 +0200 Subject: microblaze_mmu_v2: Kconfig update Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 112 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index f8e0722df0cf..b50b845fdd50 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -54,11 +54,9 @@ config GENERIC_GPIO def_bool y config PCI - depends on !MMU def_bool n config NO_DMA - depends on !MMU def_bool y source "init/Kconfig" @@ -76,7 +74,8 @@ source "kernel/Kconfig.preempt" source "kernel/Kconfig.hz" config MMU - def_bool n + bool "MMU support" + default n config NO_MMU bool @@ -119,6 +118,113 @@ config PROC_DEVICETREE endmenu +menu "Advanced setup" + +config ADVANCED_OPTIONS + bool "Prompt for advanced kernel configuration options" + depends on MMU + help + This option will enable prompting for a variety of advanced kernel + configuration options. These options can cause the kernel to not + work if they are set incorrectly, but can be used to optimize certain + aspects of kernel memory management. + + Unless you know what you are doing, say N here. + +comment "Default settings for advanced configuration options are used" + depends on !ADVANCED_OPTIONS + +config HIGHMEM_START_BOOL + bool "Set high memory pool address" + depends on ADVANCED_OPTIONS && HIGHMEM + help + This option allows you to set the base address of the kernel virtual + area used to map high memory pages. This can be useful in + optimizing the layout of kernel virtual memory. + + Say N here unless you know what you are doing. + +config HIGHMEM_START + hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL + depends on MMU + default "0xfe000000" + +config LOWMEM_SIZE_BOOL + bool "Set maximum low memory" + depends on ADVANCED_OPTIONS + help + This option allows you to set the maximum amount of memory which + will be used as "low memory", that is, memory which the kernel can + access directly, without having to set up a kernel virtual mapping. + This can be useful in optimizing the layout of kernel virtual + memory. + + Say N here unless you know what you are doing. + +config LOWMEM_SIZE + hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL + depends on MMU + default "0x30000000" + +config KERNEL_START_BOOL + bool "Set custom kernel base address" + depends on ADVANCED_OPTIONS + help + This option allows you to set the kernel virtual address at which + the kernel will map low memory (the kernel image will be linked at + this address). This can be useful in optimizing the virtual memory + layout of the system. + + Say N here unless you know what you are doing. + +config KERNEL_START + hex "Virtual address of kernel base" if KERNEL_START_BOOL + default "0xc0000000" if MMU + default KERNEL_BASE_ADDR if !MMU + +config TASK_SIZE_BOOL + bool "Set custom user task size" + depends on ADVANCED_OPTIONS + help + This option allows you to set the amount of virtual address space + allocated to user tasks. This can be useful in optimizing the + virtual memory layout of the system. + + Say N here unless you know what you are doing. + +config TASK_SIZE + hex "Size of user task space" if TASK_SIZE_BOOL + depends on MMU + default "0x80000000" + +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + depends on MMU + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + depends on MMU + default "0x00200000" if NOT_COHERENT_CACHE + +endmenu + source "mm/Kconfig" menu "Exectuable file formats" -- cgit v1.2.2 From 4b2368ffd6e3563b73b8391c133822a58b1b432a Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 26 May 2009 16:30:32 +0200 Subject: microblaze_mmu_v2: Makefiles Signed-off-by: Michal Simek --- arch/microblaze/Makefile | 4 ++++ arch/microblaze/boot/Makefile | 2 ++ arch/microblaze/kernel/Makefile | 1 + arch/microblaze/lib/Makefile | 3 ++- arch/microblaze/mm/Makefile | 2 ++ 5 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index ab731b75c23b..d0bcf80a1136 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -1,4 +1,8 @@ +ifeq ($(CONFIG_MMU),y) +UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\" +else UTS_SYSNAME = -DUTS_SYSNAME=\"uClinux\" +endif # What CPU vesion are we building for, and crack it open # as major.minor.rev diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index 844edf406d34..c2bb043a029d 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -7,6 +7,8 @@ targets := linux.bin linux.bin.gz OBJCOPYFLAGS_linux.bin := -O binary $(obj)/linux.bin: vmlinux FORCE + [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \ + touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image" $(call if_changed,objcopy) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index da94bec4ecba..f4a5e19a20eb 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -15,5 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_SELFMOD) += selfmod.o obj-$(CONFIG_HEART_BEAT) += heartbeat.o obj-$(CONFIG_MODULES) += microblaze_ksyms.o module.o +obj-$(CONFIG_MMU) += misc.o obj-y += entry$(MMUEXT).o diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile index d27126bf306a..71c8cb6c9e43 100644 --- a/arch/microblaze/lib/Makefile +++ b/arch/microblaze/lib/Makefile @@ -10,4 +10,5 @@ else lib-y += memcpy.o memmove.o endif -lib-y += uaccess.o +lib-$(CONFIG_NO_MMU) += uaccess.o +lib-$(CONFIG_MMU) += uaccess_old.o diff --git a/arch/microblaze/mm/Makefile b/arch/microblaze/mm/Makefile index bf9e4479a1fd..6c8a924d9e26 100644 --- a/arch/microblaze/mm/Makefile +++ b/arch/microblaze/mm/Makefile @@ -3,3 +3,5 @@ # obj-y := init.o + +obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o -- cgit v1.2.2 From 13503fa9137d9708d52214e9506c671dbf2fbdce Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Mar 2009 17:39:20 +0900 Subject: x86, mce: Cleanup param parser - Fix the comment formatting. - The error path does not return 0, and printk lacks level and "\n". - Move __setup("nomce") next to mcheck_disable(). - Improve readability etc. [ Impact: cleanup ] Signed-off-by: Hidetoshi Seto Acked-by: Andi Kleen LKML-Reference: <49CB3F38.7090703@jp.fujitsu.com> Signed-off-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 6fb0b359d2a5..77effb55afe7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -839,25 +839,29 @@ static int __init mcheck_disable(char *str) mce_dont_init = 1; return 1; } +__setup("nomce", mcheck_disable); -/* mce=off disables machine check. - mce=TOLERANCELEVEL (number, see above) - mce=bootlog Log MCEs from before booting. Disabled by default on AMD. - mce=nobootlog Don't log MCEs from before booting. */ +/* + * mce=off disables machine check + * mce=TOLERANCELEVEL (number, see above) + * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + * mce=nobootlog Don't log MCEs from before booting. + */ static int __init mcheck_enable(char *str) { if (!strcmp(str, "off")) mce_dont_init = 1; - else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) - mce_bootlog = str[0] == 'b'; + else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) + mce_bootlog = (str[0] == 'b'); else if (isdigit(str[0])) get_option(&str, &tolerant); - else - printk("mce= argument %s ignored. Please use /sys", str); + else { + printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n", + str); + return 0; + } return 1; } - -__setup("nomce", mcheck_disable); __setup("mce=", mcheck_enable); /* -- cgit v1.2.2 From e9eee03e99d519599eb615c3e251d5f6cc4be57d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:17 +0200 Subject: x86, mce: clean up mce_64.c This file has been modified many times along the years, by multiple authors, so the general style and structure has diverged in a number of areas making this file hard to read. So fix the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 247 ++++++++++++++++++++++-------------- 1 file changed, 149 insertions(+), 98 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 77effb55afe7..1491246c4d69 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -1,46 +1,47 @@ /* * Machine check handler. + * * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Rest from unknown author(s). * 2004 Andi Kleen. Rewrote most of it. * Copyright 2008 Intel Corporation * Author: Andi Kleen */ - -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include +#include #include -#include +#include +#include +#include +#include +#include +#include + #include -#include -#include #include -#include #include +#include +#include +#include -#define MISC_MCELOG_MINOR 227 +#define MISC_MCELOG_MINOR 227 atomic_t mce_entry; -static int mce_dont_init; +static int mce_dont_init; /* * Tolerant levels: @@ -49,16 +50,16 @@ static int mce_dont_init; * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 3: never panic or SIGBUS, log all errors (for testing only) */ -static int tolerant = 1; -static int banks; -static u64 *bank; -static unsigned long notify_user; -static int rip_msr; -static int mce_bootlog = -1; -static atomic_t mce_events; +static int tolerant = 1; +static int banks; +static u64 *bank; +static unsigned long notify_user; +static int rip_msr; +static int mce_bootlog = -1; +static atomic_t mce_events; -static char trigger[128]; -static char *trigger_argv[2] = { trigger, NULL }; +static char trigger[128]; +static char *trigger_argv[2] = { trigger, NULL }; static DECLARE_WAIT_QUEUE_HEAD(mce_wait); @@ -89,19 +90,23 @@ static struct mce_log mcelog = { void mce_log(struct mce *mce) { unsigned next, entry; + atomic_inc(&mce_events); mce->finished = 0; wmb(); for (;;) { entry = rcu_dereference(mcelog.next); for (;;) { - /* When the buffer fills up discard new entries. Assume - that the earlier errors are the more interesting. */ + /* + * When the buffer fills up discard new entries. + * Assume that the earlier errors are the more + * interesting ones: + */ if (entry >= MCE_LOG_LEN) { set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); return; } - /* Old left over entry. Skip. */ + /* Old left over entry. Skip: */ if (mcelog.entry[entry].finished) { entry++; continue; @@ -264,12 +269,12 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * implies that most kernel services cannot be safely used. Don't even * think about putting a printk in there! */ -void do_machine_check(struct pt_regs * regs, long error_code) +void do_machine_check(struct pt_regs *regs, long error_code) { struct mce m, panicm; + int panicm_found = 0; u64 mcestart = 0; int i; - int panicm_found = 0; /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. @@ -293,6 +298,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) mce_setup(&m); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + /* if the restart IP is not valid, we're done for */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) no_way_out = 1; @@ -356,23 +362,29 @@ void do_machine_check(struct pt_regs * regs, long error_code) mce_get_rip(&m, regs); mce_log(&m); - /* Did this bank cause the exception? */ - /* Assume that the bank with uncorrectable errors did it, - and that there is only a single one. */ - if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { + /* + * Did this bank cause the exception? + * + * Assume that the bank with uncorrectable errors did it, + * and that there is only a single one: + */ + if ((m.status & MCI_STATUS_UC) && + (m.status & MCI_STATUS_EN)) { panicm = m; panicm_found = 1; } } - /* If we didn't find an uncorrectable error, pick - the last one (shouldn't happen, just being safe). */ + /* + * If we didn't find an uncorrectable error, pick + * the last one (shouldn't happen, just being safe). + */ if (!panicm_found) panicm = m; /* * If we have decided that we just CAN'T continue, and the user - * has not set tolerant to an insane level, give up and die. + * has not set tolerant to an insane level, give up and die. */ if (no_way_out && tolerant < 3) mce_panic("Machine check", &panicm, mcestart); @@ -451,10 +463,9 @@ void mce_log_therm_throt_event(__u64 status) * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). */ - static int check_interval = 5 * 60; /* 5 minutes */ + static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ -static void mcheck_timer(unsigned long); static DEFINE_PER_CPU(struct timer_list, mce_timer); static void mcheck_timer(unsigned long data) @@ -464,9 +475,10 @@ static void mcheck_timer(unsigned long data) WARN_ON(smp_processor_id() != data); - if (mce_available(¤t_cpu_data)) + if (mce_available(¤t_cpu_data)) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); + } /* * Alert userspace if needed. If we logged an MCE, reduce the @@ -501,6 +513,7 @@ int mce_notify_user(void) static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); clear_thread_flag(TIF_MCE_NOTIFY); + if (test_and_clear_bit(0, ¬ify_user)) { wake_up_interruptible(&mce_wait); @@ -520,9 +533,10 @@ int mce_notify_user(void) return 0; } -/* see if the idle task needs to notify userspace */ +/* see if the idle task needs to notify userspace: */ static int -mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) +mce_idle_callback(struct notifier_block *nfb, unsigned long action, + void *unused) { /* IDLE_END should be safe - interrupts are back on */ if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) @@ -532,7 +546,7 @@ mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) } static struct notifier_block mce_idle_notifier = { - .notifier_call = mce_idle_callback, + .notifier_call = mce_idle_callback, }; static __init int periodic_mcheck_init(void) @@ -547,8 +561,8 @@ __initcall(periodic_mcheck_init); */ static int mce_cap_init(void) { - u64 cap; unsigned b; + u64 cap; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & 0xff; @@ -578,9 +592,9 @@ static int mce_cap_init(void) static void mce_init(void *dummy) { + mce_banks_t all_banks; u64 cap; int i; - mce_banks_t all_banks; /* * Log the machine checks left over from the previous reset. @@ -605,14 +619,21 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) - /* disable GART TBL walk error reporting, which trips off - incorrectly with the IOMMU & 3ware & Cerberus. */ + if (c->x86 == 15 && banks > 4) { + /* + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: + */ clear_bit(10, (unsigned long *)&bank[4]); - if(c->x86 <= 17 && mce_bootlog < 0) - /* Lots of broken BIOS around that don't clear them - by default and leave crap in there. Don't log. */ + } + if (c->x86 <= 17 && mce_bootlog < 0) { + /* + * Lots of broken BIOS around that don't clear them + * by default and leave crap in there. Don't log: + */ mce_bootlog = 0; + } } } @@ -646,7 +667,7 @@ static void mce_init_timer(void) /* * Called for each booted CPU to set up machine checks. - * Must be called with preempt off. + * Must be called with preempt off: */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { @@ -669,8 +690,8 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) */ static DEFINE_SPINLOCK(mce_state_lock); -static int open_count; /* #times opened */ -static int open_exclu; /* already open exclusive? */ +static int open_count; /* #times opened */ +static int open_exclu; /* already open exclusive? */ static int mce_open(struct inode *inode, struct file *file) { @@ -680,6 +701,7 @@ static int mce_open(struct inode *inode, struct file *file) if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { spin_unlock(&mce_state_lock); unlock_kernel(); + return -EBUSY; } @@ -712,13 +734,14 @@ static void collect_tscs(void *data) rdtscll(cpu_tsc[smp_processor_id()]); } +static DEFINE_MUTEX(mce_read_mutex); + static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { + char __user *buf = ubuf; unsigned long *cpu_tsc; - static DEFINE_MUTEX(mce_read_mutex); unsigned prev, next; - char __user *buf = ubuf; int i, err; cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); @@ -732,6 +755,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); + return -EINVAL; } @@ -770,6 +794,7 @@ timeout: * synchronize. */ on_each_cpu(collect_tscs, cpu_tsc, 1); + for (i = next; i < MCE_LOG_LEN; i++) { if (mcelog.entry[i].finished && mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { @@ -782,6 +807,7 @@ timeout: } mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); + return err ? -EFAULT : buf - ubuf; } @@ -799,6 +825,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + switch (cmd) { case MCE_GET_RECORD_LEN: return put_user(sizeof(struct mce), p); @@ -810,6 +837,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) do { flags = mcelog.flags; } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + return put_user(flags, p); } default: @@ -818,11 +846,11 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) } static const struct file_operations mce_chrdev_ops = { - .open = mce_open, - .release = mce_release, - .read = mce_read, - .poll = mce_poll, - .unlocked_ioctl = mce_ioctl, + .open = mce_open, + .release = mce_release, + .read = mce_read, + .poll = mce_poll, + .unlocked_ioctl = mce_ioctl, }; static struct miscdevice mce_log_device = { @@ -891,13 +919,16 @@ static int mce_shutdown(struct sys_device *dev) return mce_disable(); } -/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. - Only one CPU is active at this time, the others get readded later using - CPU hotplug. */ +/* + * On resume clear all MCE state. Don't want to see leftovers from the BIOS. + * Only one CPU is active at this time, the others get re-added later using + * CPU hotplug: + */ static int mce_resume(struct sys_device *dev) { mce_init(NULL); mce_cpu_features(¤t_cpu_data); + return 0; } @@ -916,14 +947,16 @@ static void mce_restart(void) } static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, - .resume = mce_resume, - .name = "machinecheck", + .suspend = mce_suspend, + .shutdown = mce_shutdown, + .resume = mce_resume, + .name = "machinecheck", }; DEFINE_PER_CPU(struct sys_device, device_mce); -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; + +__cpuinitdata +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -937,9 +970,12 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit const char *buf, size_t siz) { \ char *end; \ unsigned long new = simple_strtoul(buf, &end, 0); \ - if (end == buf) return -EINVAL; \ + \ + if (end == buf) \ + return -EINVAL; \ var = new; \ start; \ + \ return end-buf; \ } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); @@ -950,6 +986,7 @@ static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { u64 b = bank[attr - bank_attrs]; + return sprintf(buf, "%llx\n", b); } @@ -958,15 +995,18 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, { char *end; u64 new = simple_strtoull(buf, &end, 0); + if (end == buf) return -EINVAL; + bank[attr - bank_attrs] = new; mce_restart(); + return end-buf; } -static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, - char *buf) +static ssize_t +show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { strcpy(buf, trigger); strcat(buf, "\n"); @@ -974,21 +1014,27 @@ static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, } static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf,size_t siz) + const char *buf, size_t siz) { char *p; int len; + strncpy(trigger, buf, sizeof(trigger)); trigger[sizeof(trigger)-1] = 0; len = strlen(trigger); p = strchr(trigger, '\n'); - if (*p) *p = 0; + + if (*p) + *p = 0; + return len; } static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); -ACCESSOR(check_interval,check_interval,mce_restart()) + +ACCESSOR(check_interval, check_interval, mce_restart()) + static struct sysdev_attribute *mce_attributes[] = { &attr_tolerant.attr, &attr_check_interval, &attr_trigger, NULL @@ -996,7 +1042,7 @@ static struct sysdev_attribute *mce_attributes[] = { static cpumask_var_t mce_device_initialized; -/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ static __cpuinit int mce_create_device(unsigned int cpu) { int err; @@ -1006,15 +1052,15 @@ static __cpuinit int mce_create_device(unsigned int cpu) return -EIO; memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); - per_cpu(device_mce,cpu).id = cpu; - per_cpu(device_mce,cpu).cls = &mce_sysclass; + per_cpu(device_mce, cpu).id = cpu; + per_cpu(device_mce, cpu).cls = &mce_sysclass; - err = sysdev_register(&per_cpu(device_mce,cpu)); + err = sysdev_register(&per_cpu(device_mce, cpu)); if (err) return err; for (i = 0; mce_attributes[i]; i++) { - err = sysdev_create_file(&per_cpu(device_mce,cpu), + err = sysdev_create_file(&per_cpu(device_mce, cpu), mce_attributes[i]); if (err) goto error; @@ -1035,10 +1081,10 @@ error2: } error: while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce,cpu), + sysdev_remove_file(&per_cpu(device_mce, cpu), mce_attributes[i]); } - sysdev_unregister(&per_cpu(device_mce,cpu)); + sysdev_unregister(&per_cpu(device_mce, cpu)); return err; } @@ -1051,12 +1097,12 @@ static __cpuinit void mce_remove_device(unsigned int cpu) return; for (i = 0; mce_attributes[i]; i++) - sysdev_remove_file(&per_cpu(device_mce,cpu), + sysdev_remove_file(&per_cpu(device_mce, cpu), mce_attributes[i]); for (i = 0; i < banks; i++) sysdev_remove_file(&per_cpu(device_mce, cpu), &bank_attrs[i]); - sysdev_unregister(&per_cpu(device_mce,cpu)); + sysdev_unregister(&per_cpu(device_mce, cpu)); cpumask_clear_cpu(cpu, mce_device_initialized); } @@ -1076,11 +1122,12 @@ static void mce_disable_cpu(void *h) static void mce_reenable_cpu(void *h) { - int i; unsigned long action = *(unsigned long *)h; + int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_reenable(); for (i = 0; i < banks; i++) @@ -1088,8 +1135,8 @@ static void mce_reenable_cpu(void *h) } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int __cpuinit +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); @@ -1142,12 +1189,14 @@ static __init int mce_init_banks(void) for (i = 0; i < banks; i++) { struct sysdev_attribute *a = &bank_attrs[i]; - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); + + a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); if (!a->attr.name) goto nomem; - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; + + a->attr.mode = 0644; + a->show = show_bank; + a->store = set_bank; } return 0; @@ -1156,6 +1205,7 @@ nomem: kfree(bank_attrs[i].attr.name); kfree(bank_attrs); bank_attrs = NULL; + return -ENOMEM; } @@ -1185,6 +1235,7 @@ static __init int mce_init_device(void) register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); + return err; } -- cgit v1.2.2 From 3b58dfd04bdfa52e717ead8f3c7622610eb7f950 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:21 +0200 Subject: x86, mce: clean up mce_32.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_32.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 3552119b091d..05979e7eff12 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -2,13 +2,12 @@ * mce.c - x86 Machine Check Exception Reporting * (c) 2002 Alan Cox , Dave Jones */ - -#include -#include +#include #include #include +#include +#include #include -#include #include #include @@ -17,18 +16,20 @@ #include "mce.h" int mce_disabled; -int nr_mce_banks; +int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + smp_processor_id()); } /* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) -- cgit v1.2.2 From c5aaf0e0702513637278ca4e27a156caa9392817 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:18 +0200 Subject: x86, mce: clean up p4.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/p4.c | 73 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f53bdcbaf382..cb344aba479c 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -2,18 +2,17 @@ * P4 specific Machine Check Exception Reporting */ -#include -#include -#include #include +#include +#include +#include #include +#include #include #include -#include #include - -#include +#include #include "mce.h" @@ -36,6 +35,7 @@ static int mce_num_extended_msrs; #ifdef CONFIG_X86_MCE_P4THERMAL + static void unexpected_thermal_interrupt(struct pt_regs *regs) { printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", @@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) add_taint(TAINT_MACHINE_CHECK); } -/* P4/Xeon Thermal transition interrupt handler */ +/* P4/Xeon Thermal transition interrupt handler: */ static void intel_thermal_interrupt(struct pt_regs *regs) { __u64 msr_val; @@ -54,8 +54,9 @@ static void intel_thermal_interrupt(struct pt_regs *regs) therm_throt_process(msr_val & 0x1); } -/* Thermal interrupt handler for this CPU setup */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; +/* Thermal interrupt handler for this CPU setup: */ +static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = + unexpected_thermal_interrupt; void smp_thermal_interrupt(struct pt_regs *regs) { @@ -65,67 +66,76 @@ void smp_thermal_interrupt(struct pt_regs *regs) irq_exit(); } -/* P4/Xeon Thermal regulation detect and init */ +/* P4/Xeon Thermal regulation detect and init: */ static void intel_init_thermal(struct cpuinfo_x86 *c) { - u32 l, h; unsigned int cpu = smp_processor_id(); + u32 l, h; - /* Thermal monitoring */ + /* Thermal monitoring: */ if (!cpu_has(c, X86_FEATURE_ACPI)) return; /* -ENODEV */ - /* Clock modulation */ + /* Clock modulation: */ if (!cpu_has(c, X86_FEATURE_ACC)) return; /* -ENODEV */ - /* first check if its enabled already, in which case there might + /* + * First check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already -zwanem. + * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", - cpu); + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; /* -EBUSY */ } - /* check whether a vector already exists, temporarily masked? */ + /* Check whether a vector already exists, temporarily masked? */ if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", - cpu, (h & APIC_VECTOR_MASK)); + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ } - /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + /* + * The temperature transition interrupt handler setup: + */ + + /* Our delivery vector: */ + h = THERMAL_APIC_VECTOR; + + /* We'll mask the thermal vector in the lapic till we're ready: */ + h |= APIC_DM_FIXED | APIC_LVT_MASKED; apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); - /* ok we're good to go... */ + /* Ok, we're good to go... */ vendor_thermal_interrupt = intel_thermal_interrupt; rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + /* Unmask the thermal vector: */ l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); - return; } #endif /* CONFIG_X86_MCE_P4THERMAL */ - /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { u32 h; @@ -143,9 +153,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) static void intel_machine_check(struct pt_regs *regs, long error_code) { - int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; + int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -157,7 +167,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (mce_num_extended_msrs > 0) { struct intel_mce_extended_msrs dbg; + intel_get_extended_msrs(&dbg); + printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", @@ -171,6 +183,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (high & (1<<31)) { char misc[20]; char addr[24]; + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; @@ -196,6 +209,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) panic("Unable to continue"); printk(KERN_EMERG "Attempting to continue.\n"); + /* * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs @@ -217,7 +231,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } - void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; -- cgit v1.2.2 From ed8bc7ed9a2ad875617b24d2ba09e49ee886638c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:21 +0200 Subject: x86, mce: clean up p5.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/p5.c | 43 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index c9f77ea69edc..8812f5441830 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -2,11 +2,10 @@ * P5 specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ - -#include -#include -#include #include +#include +#include +#include #include #include @@ -15,39 +14,53 @@ #include "mce.h" -/* Machine check handler for Pentium class Intel */ +/* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { u32 loaddr, hi, lotype; + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); - if (lotype&(1<<5)) - printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); + + printk(KERN_EMERG + "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", + smp_processor_id(), loaddr, lotype); + + if (lotype & (1<<5)) { + printk(KERN_EMERG + "CPU#%d: Possible thermal failure (CPU on fire ?).\n", + smp_processor_id()); + } + add_taint(TAINT_MACHINE_CHECK); } -/* Set up machine check reporting for processors with Intel style MCE */ +/* Set up machine check reporting for processors with Intel style MCE: */ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; - /*Check for MCE support */ + /* Check for MCE support: */ if (!cpu_has(c, X86_FEATURE_MCE)) return; - /* Default P5 to off as its often misconnected */ + /* Default P5 to off as its often misconnected: */ if (mce_disabled != -1) return; + machine_check_vector = pentium_machine_check; + /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); - /* Read registers before enabling */ + /* Read registers before enabling: */ rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO "Intel old style machine check architecture supported.\n"); + printk(KERN_INFO + "Intel old style machine check architecture supported.\n"); - /* Enable MCE */ + /* Enable MCE: */ set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); + printk(KERN_INFO + "Intel old style machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); } -- cgit v1.2.2 From ea2566ff80e096eeecc0918fb5d5a4612d8f62ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:19 +0200 Subject: x86, mce: clean up p6.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/p6.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 2ac52d7b434b..43c24e667457 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -2,11 +2,10 @@ * P6 specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ - -#include -#include -#include #include +#include +#include +#include #include #include @@ -18,9 +17,9 @@ /* Machine Check Handler For PII/PIII */ static void intel_machine_check(struct pt_regs *regs, long error_code) { - int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; + int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) if (high & (1<<31)) { char misc[20]; char addr[24]; - misc[0] = addr[0] = '\0'; + + misc[0] = '\0'; + addr[0] = '\0'; + if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; high &= ~(1<<31); + if (high & (1<<27)) { rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); snprintf(misc, 20, "[%08x%08x]", ahigh, alow); @@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); snprintf(addr, 24, " at %08x%08x", ahigh, alow); } + printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", smp_processor_id(), i, high, low, misc, addr); } @@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) /* * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error. + * for errors if the OS could not log the error: */ for (i = 0; i < nr_mce_banks; i++) { unsigned int msr; + msr = MSR_IA32_MC0_STATUS+i*4; rdmsr(msr, low, high); if (high & (1<<31)) { - /* Clear it */ + /* Clear it: */ wrmsr(msr, 0UL, 0UL); - /* Serialize */ + /* Serialize: */ wmb(); add_taint(TAINT_MACHINE_CHECK); } @@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } -/* Set up machine check reporting for processors with Intel style MCE */ +/* Set up machine check reporting for processors with Intel style MCE: */ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; @@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) /* Ok machine check is available */ machine_check_vector = intel_machine_check; + /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); printk(KERN_INFO "Intel machine check architecture supported.\n"); -- cgit v1.2.2 From efee4ca80980f97b60e91e3322c3342f19623eff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:20 +0200 Subject: x86, mce: clean up k7.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/k7.c | 42 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index dd3af6e7b39a..89e510424152 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -2,11 +2,10 @@ * Athlon specific Machine Check Exception Reporting * (C) Copyright 2002 Dave Jones */ - -#include -#include -#include #include +#include +#include +#include #include #include @@ -15,12 +14,12 @@ #include "mce.h" -/* Machine Check Handler For AMD Athlon/Duron */ +/* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { - int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; + int recover = 1; int i; rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); @@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) for (i = 1; i < nr_mce_banks; i++) { rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high&(1<<31)) { + if (high & (1<<31)) { char misc[20]; char addr[24]; - misc[0] = addr[0] = '\0'; + + misc[0] = '\0'; + addr[0] = '\0'; + if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; high &= ~(1<<31); + if (high & (1<<27)) { rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); snprintf(misc, 20, "[%08x%08x]", ahigh, alow); @@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); snprintf(addr, 24, " at %08x%08x", ahigh, alow); } + printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", smp_processor_id(), i, high, low, misc, addr); - /* Clear it */ + + /* Clear it: */ wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - /* Serialize */ + /* Serialize: */ wmb(); add_taint(TAINT_MACHINE_CHECK); } } - if (recover&2) + if (recover & 2) panic("CPU context corrupt"); - if (recover&1) + if (recover & 1) panic("Unable to continue"); + printk(KERN_EMERG "Attempting to continue.\n"); + mcgstl &= ~(1<<2); wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } -/* AMD K7 machine check is Intel like */ +/* AMD K7 machine check is Intel like: */ void amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; @@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) return; machine_check_vector = k7_machine_check; + /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); printk(KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - /* Clear status for MC index 0 separately, we don't touch CTL, - * as some K7 Athlons cause spurious MCEs when its enabled. */ + /* + * Clear status for MC index 0 separately, we don't touch CTL, + * as some K7 Athlons cause spurious MCEs when its enabled: + */ if (boot_cpu_data.x86 == 6) { wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); i = 1; } else i = 0; + for (; i < nr_mce_banks; i++) { wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); -- cgit v1.2.2 From 91425084f74c0ad087b3fb6bdad79a825f952720 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:22 +0200 Subject: x86, mce: clean up winchip.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/winchip.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2a043d89811d..81b02487090b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -2,11 +2,10 @@ * IDT Winchip specific Machine Check Exception Reporting * (C) Copyright 2002 Alan Cox */ - -#include -#include -#include #include +#include +#include +#include #include #include @@ -14,7 +13,7 @@ #include "mce.h" -/* Machine check handler for WinChip C6 */ +/* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); @@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; + machine_check_vector = winchip_machine_check; + /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); + rdmsr(MSR_IDT_FCR1, lo, hi); lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ lo &= ~(1<<4); /* Enable MCE */ wrmsr(MSR_IDT_FCR1, lo, hi); + set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); + + printk(KERN_INFO + "Winchip machine check reporting enabled on CPU#0.\n"); } -- cgit v1.2.2 From bdbfbdd5e8f0efb9bfef2e597f8ac673c36317ab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:20 +0200 Subject: x86, mce: clean up non-fatal.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/non-fatal.c | 57 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index a74af128efc9..70b710420f74 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -6,15 +6,14 @@ * This file contains routines to check for non-fatal MCEs every 15s * */ - -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include +#include +#include +#include #include #include @@ -22,9 +21,9 @@ #include "mce.h" -static int firstbank; +static int firstbank; -#define MCE_RATE 15*HZ /* timer rate is 15s */ +#define MCE_RATE (15*HZ) /* timer rate is 15s */ static void mce_checkregs(void *info) { @@ -34,23 +33,24 @@ static void mce_checkregs(void *info) for (i = firstbank; i < nr_mce_banks; i++) { rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high & (1<<31)) { - printk(KERN_INFO "MCE: The hardware reports a non " - "fatal, correctable incident occurred on " - "CPU %d.\n", + if (!(high & (1<<31))) + continue; + + printk(KERN_INFO "MCE: The hardware reports a non fatal, " + "correctable incident occurred on CPU %d.\n", smp_processor_id()); - printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); - - /* - * Scrub the error so we don't pick it up in MCE_RATE - * seconds time. - */ - wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } + + printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); + + /* + * Scrub the error so we don't pick it up in MCE_RATE + * seconds time: + */ + wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); + + /* Serialize: */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); } } @@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void) /* Some Athlons misbehave when we frob bank 0 */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 6) - firstbank = 1; + boot_cpu_data.x86 == 6) + firstbank = 1; else - firstbank = 0; + firstbank = 0; /* * Check for non-fatal errors every MCE_RATE s */ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); printk(KERN_INFO "Machine check exception polling timer started.\n"); + return 0; } module_init(init_nonfatal_mce_checker); -- cgit v1.2.2 From cb6f3c155b0afabc48667efb9e7b1ce92ccfcab4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:19 +0200 Subject: x86, mce: clean up therm_throt.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 74 +++++++++++++++++--------------- 1 file changed, 39 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d5ae2243f0b9..a2b5d7ddb19f 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,7 +1,7 @@ /* - * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). + * * This allows consistent reporting of CPU thermal throttle events. * * Maintains a counter in /sys that keeps track of the number of thermal @@ -13,43 +13,44 @@ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Inspired by Ross Biro's and Al Borchers' counter code. */ - +#include +#include #include #include #include -#include -#include -#include + #include +#include /* How long to wait between reporting thermal events */ -#define CHECK_INTERVAL (300 * HZ) +#define CHECK_INTERVAL (300 * HZ) static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); + +atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS -#define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) - -#define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ -{ \ - unsigned int cpu = dev->id; \ - ssize_t ret; \ - \ - preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) \ - ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ - else \ - ret = 0; \ - preempt_enable(); \ - \ - return ret; \ +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ } define_therm_throt_sysdev_show_func(count); @@ -61,8 +62,8 @@ static struct attribute *thermal_throttle_attrs[] = { }; static struct attribute_group thermal_throttle_attr_group = { - .attrs = thermal_throttle_attrs, - .name = "thermal_throttle" + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" }; #endif /* CONFIG_SYSFS */ @@ -110,10 +111,11 @@ int therm_throt_process(int curr) } #ifdef CONFIG_SYSFS -/* Add/Remove thermal_throttle interface for CPU device */ +/* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) { - return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return sysfs_create_group(&sys_dev->kobj, + &thermal_throttle_attr_group); } static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) @@ -121,19 +123,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); } -/* Mutex protecting device creation against CPU hotplug */ +/* Mutex protecting device creation against CPU hotplug: */ static DEFINE_MUTEX(therm_cpu_lock); /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static __cpuinit int +thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; int err = 0; sys_dev = get_cpu_sysdev(cpu); + switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: -- cgit v1.2.2 From 1cb2a8e1767ab60370ecce90654c0f281c602d95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:18 +0200 Subject: x86, mce: clean up mce_amd_64.c Make the coding style match that of the rest of the x86 arch code. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 188 +++++++++++++++++--------------- 1 file changed, 103 insertions(+), 85 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 56dde9c4bc96..4d90ec3eb51d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -13,22 +13,22 @@ * * All MC4_MISCi registers are shared between multi-cores */ - -#include -#include -#include #include -#include #include -#include -#include +#include #include +#include +#include #include +#include +#include +#include + +#include #include +#include #include #include -#include -#include #define PFX "mce_threshold: " #define VERSION "version 1.1.1" @@ -48,26 +48,26 @@ #define MCG_XBLK_ADDR 0xC0000400 struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; + unsigned int block; + unsigned int bank; + unsigned int cpu; + u32 address; + u16 interrupt_enable; + u16 threshold_limit; + struct kobject kobj; + struct list_head miscj; }; /* defaults used early on boot */ static struct threshold_block threshold_defaults = { - .interrupt_enable = 0, - .threshold_limit = THRESHOLD_MAX, + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, }; struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - cpumask_var_t cpus; + struct kobject *kobj; + struct threshold_block *blocks; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void); */ struct thresh_restart { - struct threshold_block *b; - int reset; - u16 old_limit; + struct threshold_block *b; + int reset; + u16 old_limit; }; /* must be called with correct cpu affinity */ @@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr) } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } @@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr) /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - unsigned int bank, block; unsigned int cpu = smp_processor_id(); - u8 lvt_off; u32 low = 0, high = 0, address = 0; + unsigned int bank, block; struct thresh_restart tr; + u8 lvt_off; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (!address) break; address += MCG_XBLK_ADDR; - } - else + } else ++address; if (rdmsr_safe(address, &low, &high)) @@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) */ static void amd_threshold_interrupt(void) { + u32 low = 0, high = 0, address = 0; unsigned int bank, block; struct mce m; - u32 low = 0, high = 0, address = 0; mce_setup(&m); @@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void) if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) + if (block == 0) { address = MSR_IA32_MC0_MISC + bank * 4; - else if (block == 1) { + } else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) break; address += MCG_XBLK_ADDR; - } - else + } else { ++address; + } if (rdmsr_safe(address, &low, &high)) break; @@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void) (high & MASK_LOCKED_HI)) continue; - /* Log the machine check that caused the threshold - event. */ + /* + * Log the machine check that caused the threshold + * event. + */ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); @@ -254,48 +256,56 @@ static void amd_threshold_interrupt(void) struct threshold_attr { struct attribute attr; - ssize_t(*show) (struct threshold_block *, char *); - ssize_t(*store) (struct threshold_block *, const char *, size_t count); + ssize_t (*show) (struct threshold_block *, char *); + ssize_t (*store) (struct threshold_block *, const char *, size_t count); }; -#define SHOW_FIELDS(name) \ -static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ -{ \ - return sprintf(buf, "%lx\n", (unsigned long) b->name); \ +#define SHOW_FIELDS(name) \ +static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ +{ \ + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ } SHOW_FIELDS(interrupt_enable) SHOW_FIELDS(threshold_limit) -static ssize_t store_interrupt_enable(struct threshold_block *b, - const char *buf, size_t count) +static ssize_t +store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { - char *end; struct thresh_restart tr; - unsigned long new = simple_strtoul(buf, &end, 0); + unsigned long new; + char *end; + + new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; + b->interrupt_enable = !!new; - tr.b = b; - tr.reset = 0; - tr.old_limit = 0; + tr.b = b; + tr.reset = 0; + tr.old_limit = 0; + smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); return end - buf; } -static ssize_t store_threshold_limit(struct threshold_block *b, - const char *buf, size_t count) +static ssize_t +store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { - char *end; struct thresh_restart tr; - unsigned long new = simple_strtoul(buf, &end, 0); + unsigned long new; + char *end; + + new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; + if (new > THRESHOLD_MAX) new = THRESHOLD_MAX; if (new < 1) new = 1; + tr.old_limit = b->threshold_limit; b->threshold_limit = new; tr.b = b; @@ -307,8 +317,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b, } struct threshold_block_cross_cpu { - struct threshold_block *tb; - long retval; + struct threshold_block *tb; + long retval; }; static void local_error_count_handler(void *_tbcc) @@ -338,15 +348,16 @@ static ssize_t store_error_count(struct threshold_block *b, return 1; } -#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ +#define THRESHOLD_ATTR(_name, _mode, _show, _store) \ +{ \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ }; -#define RW_ATTR(name) \ -static struct threshold_attr name = \ - THRESHOLD_ATTR(name, 0644, show_## name, store_## name) +#define RW_ATTR(name) \ +static struct threshold_attr name = \ + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) RW_ATTR(interrupt_enable); RW_ATTR(threshold_limit); @@ -359,15 +370,17 @@ static struct attribute *default_attrs[] = { NULL }; -#define to_block(k) container_of(k, struct threshold_block, kobj) -#define to_attr(a) container_of(a, struct threshold_attr, attr) +#define to_block(k) container_of(k, struct threshold_block, kobj) +#define to_attr(a) container_of(a, struct threshold_attr, attr) static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct threshold_block *b = to_block(kobj); struct threshold_attr *a = to_attr(attr); ssize_t ret; + ret = a->show ? a->show(b, buf) : -EIO; + return ret; } @@ -377,18 +390,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct threshold_block *b = to_block(kobj); struct threshold_attr *a = to_attr(attr); ssize_t ret; + ret = a->store ? a->store(b, buf, count) : -EIO; + return ret; } static struct sysfs_ops threshold_ops = { - .show = show, - .store = store, + .show = show, + .store = store, }; static struct kobj_type threshold_ktype = { - .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .sysfs_ops = &threshold_ops, + .default_attrs = default_attrs, }; static __cpuinit int allocate_threshold_blocks(unsigned int cpu, @@ -396,9 +411,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, unsigned int block, u32 address) { - int err; - u32 low, high; struct threshold_block *b = NULL; + u32 low, high; + int err; if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) return 0; @@ -421,20 +436,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, if (!b) return -ENOMEM; - b->block = block; - b->bank = bank; - b->cpu = cpu; - b->address = address; - b->interrupt_enable = 0; - b->threshold_limit = THRESHOLD_MAX; + b->block = block; + b->bank = bank; + b->cpu = cpu; + b->address = address; + b->interrupt_enable = 0; + b->threshold_limit = THRESHOLD_MAX; INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) + if (per_cpu(threshold_banks, cpu)[bank]->blocks) { list_add(&b->miscj, &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - else + } else { per_cpu(threshold_banks, cpu)[bank]->blocks = b; + } err = kobject_init_and_add(&b->kobj, &threshold_ktype, per_cpu(threshold_banks, cpu)[bank]->kobj, @@ -447,8 +463,9 @@ recurse: if (!address) return 0; address += MCG_XBLK_ADDR; - } else + } else { ++address; + } err = allocate_threshold_blocks(cpu, bank, ++block, address); if (err) @@ -507,6 +524,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) cpumask_copy(b->cpus, cpu_core_mask(cpu)); per_cpu(threshold_banks, cpu)[bank] = b; + goto out; } #endif @@ -605,15 +623,13 @@ static void deallocate_threshold_block(unsigned int cpu, static void threshold_remove_bank(unsigned int cpu, int bank) { - int i = 0; struct threshold_bank *b; char name[32]; + int i = 0; b = per_cpu(threshold_banks, cpu)[bank]; - if (!b) return; - if (!b->blocks) goto free_out; @@ -624,6 +640,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (shared_bank[bank] && b->blocks->cpu != cpu) { sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; + return; } #endif @@ -659,8 +676,8 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, - unsigned int cpu) +static void __cpuinit +amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) { if (cpu >= NR_CPUS) return; @@ -686,11 +703,12 @@ static __init int threshold_init_device(void) /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = threshold_create_device(lcpu); + if (err) return err; } threshold_cpu_callback = amd_64_threshold_cpu_callback; + return 0; } - device_initcall(threshold_init_device); -- cgit v1.2.2 From 6cc6f3ebd19fea722c19630af5ad68af7f51d493 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Apr 2009 12:31:23 +0200 Subject: x86, mce: unify Intel thermal init, prepare Prepare for unification, make two intel_init_thermal equal. [ Impact: cleanup ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 35 +++++++++++++----------- arch/x86/kernel/cpu/mcheck/p4.c | 44 ++++++++++++++----------------- 2 files changed, 40 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index cef3ee30744b..b85d0c107c84 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -34,21 +34,22 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } +static inline void intel_set_thermal_handler(void) { } + static void intel_init_thermal(struct cpuinfo_x86 *c) { - u32 l, h; - int tm2 = 0; unsigned int cpu = smp_processor_id(); + int tm2 = 0; + u32 l, h; - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; - - if (!cpu_has(c, X86_FEATURE_ACC)) + /* Thermal monitoring depends on ACPI and clock modulation*/ + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) return; - /* first check if TM1 is already enabled by the BIOS, in which - * case there might be some SMM goo which handles it, so we can't even - * put a handler since it might be delivered via SMI already. + /* + * First check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); @@ -61,31 +62,35 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) tm2 = 1; + /* Check whether a vector already exists */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", cpu, (h & APIC_VECTOR_MASK)); + "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); return; } - h = THERMAL_APIC_VECTOR; - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); + /* We'll mask the thermal vector in the lapic till we're ready: */ + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + intel_set_thermal_handler(); + rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + /* Unmask the thermal vector: */ l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); + cpu, tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); - return; } /* diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index cb344aba479c..f70753a443bb 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -66,19 +66,21 @@ void smp_thermal_interrupt(struct pt_regs *regs) irq_exit(); } +static void intel_set_thermal_handler(void) +{ + vendor_thermal_interrupt = intel_thermal_interrupt; +} + /* P4/Xeon Thermal regulation detect and init: */ static void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); + int tm2 = 0; u32 l, h; - /* Thermal monitoring: */ - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ - - /* Clock modulation: */ - if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ + /* Thermal monitoring depends on ACPI and clock modulation*/ + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return; /* * First check if its enabled already, in which case there might @@ -90,35 +92,28 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); - - return; /* -EBUSY */ + return; } - /* Check whether a vector already exists, temporarily masked? */ + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) + tm2 = 1; + + /* Check whether a vector already exists */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n", cpu, (h & APIC_VECTOR_MASK)); - - return; /* -EBUSY */ + return; } - /* - * The temperature transition interrupt handler setup: - */ - - /* Our delivery vector: */ - h = THERMAL_APIC_VECTOR; - /* We'll mask the thermal vector in the lapic till we're ready: */ - h |= APIC_DM_FIXED | APIC_LVT_MASKED; + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); - /* Ok, we're good to go... */ - vendor_thermal_interrupt = intel_thermal_interrupt; + intel_set_thermal_handler(); rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); @@ -127,7 +122,8 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); -- cgit v1.2.2 From a65d086235208a3b3546e209d2210048549099b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Apr 2009 12:31:23 +0200 Subject: x86, mce: unify Intel thermal init Mechanic unification. No change in code. [ Impact: cleanup, 32-bit / 64-bit unification ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 3 +- arch/x86/kernel/cpu/mcheck/mce.h | 11 +++++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 73 +++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 61 +------------------------- arch/x86/kernel/cpu/mcheck/p4.c | 59 +------------------------ 5 files changed, 89 insertions(+), 118 deletions(-) create mode 100644 arch/x86/kernel/cpu/mcheck/mce_intel.c (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..6def76942bf2 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,7 +1,8 @@ obj-y = mce_$(BITS).o therm_throt.o obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o +obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index ae9f628838f1..2d1a54bdadfc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -1,6 +1,8 @@ #include #include +#ifdef CONFIG_X86_32 + void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p5_mcheck_init(struct cpuinfo_x86 *c); @@ -12,3 +14,12 @@ extern void (*machine_check_vector)(struct pt_regs *, long error_code); extern int nr_mce_banks; +void intel_set_thermal_handler(void); + +#else + +static inline void intel_set_thermal_handler(void) { } + +#endif + +void intel_init_thermal(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c new file mode 100644 index 000000000000..bad3cbb0e566 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -0,0 +1,73 @@ +/* + * Common code for Intel machine checks + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "mce.h" + +void intel_init_thermal(struct cpuinfo_x86 *c) +{ + unsigned int cpu = smp_processor_id(); + int tm2 = 0; + u32 l, h; + + /* Thermal monitoring depends on ACPI and clock modulation*/ + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return; + + /* + * First check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already: + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + tm2 = 1; + + /* Check whether a vector already exists */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; + } + + /* We'll mask the thermal vector in the lapic till we're ready: */ + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + + intel_set_thermal_handler(); + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + + /* Unmask the thermal vector: */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index b85d0c107c84..38f9632306fa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -17,6 +17,8 @@ #include #include +#include "mce.h" + asmlinkage void smp_thermal_interrupt(void) { __u64 msr_val; @@ -34,65 +36,6 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } -static inline void intel_set_thermal_handler(void) { } - -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; - - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) - return; - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); - - intel_set_thermal_handler(); - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); -} - /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f70753a443bb..f979ffea330b 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -66,68 +66,11 @@ void smp_thermal_interrupt(struct pt_regs *regs) irq_exit(); } -static void intel_set_thermal_handler(void) +void intel_set_thermal_handler(void) { vendor_thermal_interrupt = intel_thermal_interrupt; } -/* P4/Xeon Thermal regulation detect and init: */ -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; - - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) - return; - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); - - intel_set_thermal_handler(); - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); -} #endif /* CONFIG_X86_MCE_P4THERMAL */ /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -- cgit v1.2.2 From 06b851d98266b812b2fa23d007cdf53f41194bbb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:25 +0200 Subject: x86, mce: unify, prepare 64bit in mce.h Prepare mce.h for unification, so that it will build on 32-bit x86 kernels too. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4f8c199584e7..8488210b866f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_MCE_H #define _ASM_X86_MCE_H -#ifdef __x86_64__ - #include #include @@ -10,21 +8,21 @@ * Machine Check support for x86 */ -#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ -#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ -#define MCI_STATUS_VAL (1UL<<63) /* valid error */ -#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1UL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ /* Fields are zero when not available */ struct mce { @@ -82,13 +80,11 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) -#endif /* __x86_64__ */ - #ifdef __KERNEL__ #ifdef CONFIG_X86_32 extern int mce_disabled; -#else /* CONFIG_X86_32 */ +#endif #include @@ -143,8 +139,6 @@ extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); extern int mce_notify_user(void); -#endif /* !CONFIG_X86_32 */ - #ifdef CONFIG_X86_MCE extern void mcheck_init(struct cpuinfo_x86 *c); #else -- cgit v1.2.2 From a988d334ae8213c0e0e62327222f6e5e6e52bcf1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:25 +0200 Subject: x86, mce: unify, prepare codes Move current 32-bit mce_32.c code into mce_64.c. [ Remove unused artifact stop/restart_mce pointed by Andi Kleen ] Signed-off-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1491246c4d69..ce48ae75e1dc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -1240,3 +1240,68 @@ static __init int mce_init_device(void) } device_initcall(mce_init_device); + +#ifdef CONFIG_X86_32 + +int mce_disabled; + +int nr_mce_banks; +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled == 1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86 == 5) + intel_p5_mcheck_init(c); + if (c->x86 == 6) + intel_p6_mcheck_init(c); + if (c->x86 == 15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86 == 5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); + +#endif /* CONFIG_X86_32 */ -- cgit v1.2.2 From 711c2e481c9d1113650d09de10f61ee88ab56fda Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:26 +0200 Subject: x86, mce: unify, prepare for 32-bit v2 Prepare the 64-bit mce_64.c code side to be built on 32-bit. [ includes ifdef relocation by Andi Kleen ] Signed-off-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.h | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_64.c | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index 2d1a54bdadfc..cd6cffcc2de0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -1,14 +1,14 @@ #include #include -#ifdef CONFIG_X86_32 - void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); +#ifdef CONFIG_X86_32 + /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index ce48ae75e1dc..2e2c3d2e9586 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -37,6 +37,10 @@ #include #include +#include "mce.h" + +#ifdef CONFIG_X86_64 + #define MISC_MCELOG_MINOR 227 atomic_t mce_entry; @@ -1241,7 +1245,7 @@ static __init int mce_init_device(void) device_initcall(mce_init_device); -#ifdef CONFIG_X86_32 +#else /* CONFIG_X86_32: */ int mce_disabled; -- cgit v1.2.2 From dba3725d44f5dfb5711fd509fca10b5b828c43b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:26 +0200 Subject: x86, mce: unify move mce_64.c => mce.c and glue it up in the Makefile. Remove mce_32.c Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 1311 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce_32.c | 77 -- arch/x86/kernel/cpu/mcheck/mce_64.c | 1311 ----------------------------------- 4 files changed, 1312 insertions(+), 1389 deletions(-) create mode 100644 arch/x86/kernel/cpu/mcheck/mce.c delete mode 100644 arch/x86/kernel/cpu/mcheck/mce_32.c delete mode 100644 arch/x86/kernel/cpu/mcheck/mce_64.c (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 6def76942bf2..55f01b39a105 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,4 +1,4 @@ -obj-y = mce_$(BITS).o therm_throt.o +obj-y = mce.o therm_throt.o obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 000000000000..2e2c3d2e9586 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -0,0 +1,1311 @@ +/* + * Machine check handler. + * + * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. + * Rest from unknown author(s). + * 2004 Andi Kleen. Rewrote most of it. + * Copyright 2008 Intel Corporation + * Author: Andi Kleen + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mce.h" + +#ifdef CONFIG_X86_64 + +#define MISC_MCELOG_MINOR 227 + +atomic_t mce_entry; + +static int mce_dont_init; + +/* + * Tolerant levels: + * 0: always panic on uncorrected errors, log corrected errors + * 1: panic or SIGBUS on uncorrected errors, log corrected errors + * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors + * 3: never panic or SIGBUS, log all errors (for testing only) + */ +static int tolerant = 1; +static int banks; +static u64 *bank; +static unsigned long notify_user; +static int rip_msr; +static int mce_bootlog = -1; +static atomic_t mce_events; + +static char trigger[128]; +static char *trigger_argv[2] = { trigger, NULL }; + +static DECLARE_WAIT_QUEUE_HEAD(mce_wait); + +/* MCA banks polled by the period polling timer for corrected events */ +DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { + [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL +}; + +/* Do initial initialization of a struct mce */ +void mce_setup(struct mce *m) +{ + memset(m, 0, sizeof(struct mce)); + m->cpu = smp_processor_id(); + rdtscll(m->tsc); +} + +/* + * Lockless MCE logging infrastructure. + * This avoids deadlocks on printk locks without having to break locks. Also + * separate MCEs from kernel messages to avoid bogus bug reports. + */ + +static struct mce_log mcelog = { + MCE_LOG_SIGNATURE, + MCE_LOG_LEN, +}; + +void mce_log(struct mce *mce) +{ + unsigned next, entry; + + atomic_inc(&mce_events); + mce->finished = 0; + wmb(); + for (;;) { + entry = rcu_dereference(mcelog.next); + for (;;) { + /* + * When the buffer fills up discard new entries. + * Assume that the earlier errors are the more + * interesting ones: + */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); + return; + } + /* Old left over entry. Skip: */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } + break; + } + smp_rmb(); + next = entry + 1; + if (cmpxchg(&mcelog.next, entry, next) == entry) + break; + } + memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); + wmb(); + mcelog.entry[entry].finished = 1; + wmb(); + + set_bit(0, ¬ify_user); +} + +static void print_mce(struct mce *m) +{ + printk(KERN_EMERG "\n" + KERN_EMERG "HARDWARE ERROR\n" + KERN_EMERG + "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + m->cpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { + printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) + print_symbol("{%s}", m->ip); + printk("\n"); + } + printk(KERN_EMERG "TSC %llx ", m->tsc); + if (m->addr) + printk("ADDR %llx ", m->addr); + if (m->misc) + printk("MISC %llx ", m->misc); + printk("\n"); + printk(KERN_EMERG "This is not a software problem!\n"); + printk(KERN_EMERG "Run through mcelog --ascii to decode " + "and contact your hardware vendor\n"); +} + +static void mce_panic(char *msg, struct mce *backup, unsigned long start) +{ + int i; + + oops_begin(); + for (i = 0; i < MCE_LOG_LEN; i++) { + unsigned long tsc = mcelog.entry[i].tsc; + + if (time_before(tsc, start)) + continue; + print_mce(&mcelog.entry[i]); + if (backup && mcelog.entry[i].tsc == backup->tsc) + backup = NULL; + } + if (backup) + print_mce(backup); + panic(msg); +} + +int mce_available(struct cpuinfo_x86 *c) +{ + if (mce_dont_init) + return 0; + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) +{ + if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { + m->ip = regs->ip; + m->cs = regs->cs; + } else { + m->ip = 0; + m->cs = 0; + } + if (rip_msr) { + /* Assume the RIP in the MSR is exact. Is this true? */ + m->mcgstatus |= MCG_STATUS_EIPV; + rdmsrl(rip_msr, m->ip); + m->cs = 0; + } +} + +/* + * Poll for corrected events or events that happened before reset. + * Those are just logged through /dev/mcelog. + * + * This is executed in standard interrupt context. + */ +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) +{ + struct mce m; + int i; + + mce_setup(&m); + + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + for (i = 0; i < banks; i++) { + if (!bank[i] || !test_bit(i, *b)) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + m.tsc = 0; + + barrier(); + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if (!(m.status & MCI_STATUS_VAL)) + continue; + + /* + * Uncorrected events are handled by the exception handler + * when it is enabled. But when the exception is disabled log + * everything. + * + * TBD do the same check for MCI_STATUS_EN here? + */ + if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) + continue; + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + if (!(flags & MCP_TIMESTAMP)) + m.tsc = 0; + /* + * Don't get the IP here because it's unlikely to + * have anything to do with the actual error location. + */ + if (!(flags & MCP_DONTLOG)) { + mce_log(&m); + add_taint(TAINT_MACHINE_CHECK); + } + + /* + * Clear state for this bank. + */ + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } + + /* + * Don't clear MCG_STATUS here because it's only defined for + * exceptions. + */ +} + +/* + * The actual machine check handler. This only handles real + * exceptions when something got corrupted coming in through int 18. + * + * This is executed in NMI context not subject to normal locking rules. This + * implies that most kernel services cannot be safely used. Don't even + * think about putting a printk in there! + */ +void do_machine_check(struct pt_regs *regs, long error_code) +{ + struct mce m, panicm; + int panicm_found = 0; + u64 mcestart = 0; + int i; + /* + * If no_way_out gets set, there is no safe way to recover from this + * MCE. If tolerant is cranked up, we'll try anyway. + */ + int no_way_out = 0; + /* + * If kill_it gets set, there might be a way to recover from this + * error. + */ + int kill_it = 0; + DECLARE_BITMAP(toclear, MAX_NR_BANKS); + + atomic_inc(&mce_entry); + + if (notify_die(DIE_NMI, "machine check", regs, error_code, + 18, SIGKILL) == NOTIFY_STOP) + goto out2; + if (!banks) + goto out2; + + mce_setup(&m); + + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + + /* if the restart IP is not valid, we're done for */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + no_way_out = 1; + + rdtscll(mcestart); + barrier(); + + for (i = 0; i < banks; i++) { + __clear_bit(i, toclear); + if (!bank[i]) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if ((m.status & MCI_STATUS_VAL) == 0) + continue; + + /* + * Non uncorrected errors are handled by machine_check_poll + * Leave them alone. + */ + if ((m.status & MCI_STATUS_UC) == 0) + continue; + + /* + * Set taint even when machine check was not enabled. + */ + add_taint(TAINT_MACHINE_CHECK); + + __set_bit(i, toclear); + + if (m.status & MCI_STATUS_EN) { + /* if PCC was set, there's no way out */ + no_way_out |= !!(m.status & MCI_STATUS_PCC); + /* + * If this error was uncorrectable and there was + * an overflow, we're in trouble. If no overflow, + * we might get away with just killing a task. + */ + if (m.status & MCI_STATUS_UC) { + if (tolerant < 1 || m.status & MCI_STATUS_OVER) + no_way_out = 1; + kill_it = 1; + } + } else { + /* + * Machine check event was not enabled. Clear, but + * ignore. + */ + continue; + } + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + mce_get_rip(&m, regs); + mce_log(&m); + + /* + * Did this bank cause the exception? + * + * Assume that the bank with uncorrectable errors did it, + * and that there is only a single one: + */ + if ((m.status & MCI_STATUS_UC) && + (m.status & MCI_STATUS_EN)) { + panicm = m; + panicm_found = 1; + } + } + + /* + * If we didn't find an uncorrectable error, pick + * the last one (shouldn't happen, just being safe). + */ + if (!panicm_found) + panicm = m; + + /* + * If we have decided that we just CAN'T continue, and the user + * has not set tolerant to an insane level, give up and die. + */ + if (no_way_out && tolerant < 3) + mce_panic("Machine check", &panicm, mcestart); + + /* + * If the error seems to be unrecoverable, something should be + * done. Try to kill as little as possible. If we can kill just + * one task, do that. If the user has set the tolerance very + * high, don't try to do anything at all. + */ + if (kill_it && tolerant < 3) { + int user_space = 0; + + /* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ + if (m.mcgstatus & MCG_STATUS_EIPV) + user_space = panicm.ip && (panicm.cs & 3); + + /* + * If we know that the error was in user space, send a + * SIGBUS. Otherwise, panic if tolerance is low. + * + * force_sig() takes an awful lot of locks and has a slight + * risk of deadlocking. + */ + if (user_space) { + force_sig(SIGBUS, current); + } else if (panic_on_oops || tolerant < 2) { + mce_panic("Uncorrected machine check", + &panicm, mcestart); + } + } + + /* notify userspace ASAP */ + set_thread_flag(TIF_MCE_NOTIFY); + + /* the last thing we do is clear state */ + for (i = 0; i < banks; i++) { + if (test_bit(i, toclear)) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } + wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); +} + +#ifdef CONFIG_X86_MCE_INTEL +/*** + * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog + * @cpu: The CPU on which the event occurred. + * @status: Event status information + * + * This function should be called by the thermal interrupt after the + * event has been processed and the decision was made to log the event + * further. + * + * The status parameter will be saved to the 'status' field of 'struct mce' + * and historically has been the register value of the + * MSR_IA32_THERMAL_STATUS (Intel) msr. + */ +void mce_log_therm_throt_event(__u64 status) +{ + struct mce m; + + mce_setup(&m); + m.bank = MCE_THERMAL_BANK; + m.status = status; + mce_log(&m); +} +#endif /* CONFIG_X86_MCE_INTEL */ + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll 2x faster. When the poller finds no more + * errors, poll 2x slower (up to check_interval seconds). + */ +static int check_interval = 5 * 60; /* 5 minutes */ + +static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ +static DEFINE_PER_CPU(struct timer_list, mce_timer); + +static void mcheck_timer(unsigned long data) +{ + struct timer_list *t = &per_cpu(mce_timer, data); + int *n; + + WARN_ON(smp_processor_id() != data); + + if (mce_available(¤t_cpu_data)) { + machine_check_poll(MCP_TIMESTAMP, + &__get_cpu_var(mce_poll_banks)); + } + + /* + * Alert userspace if needed. If we logged an MCE, reduce the + * polling interval, otherwise increase the polling interval. + */ + n = &__get_cpu_var(next_interval); + if (mce_notify_user()) { + *n = max(*n/2, HZ/100); + } else { + *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); + } + + t->expires = jiffies + *n; + add_timer(t); +} + +static void mce_do_trigger(struct work_struct *work) +{ + call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); +} + +static DECLARE_WORK(mce_trigger_work, mce_do_trigger); + +/* + * Notify the user(s) about new machine check events. + * Can be called from interrupt context, but not from machine check/NMI + * context. + */ +int mce_notify_user(void) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + clear_thread_flag(TIF_MCE_NOTIFY); + + if (test_and_clear_bit(0, ¬ify_user)) { + wake_up_interruptible(&mce_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (trigger[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + printk(KERN_INFO "Machine check events logged\n"); + + return 1; + } + return 0; +} + +/* see if the idle task needs to notify userspace: */ +static int +mce_idle_callback(struct notifier_block *nfb, unsigned long action, + void *unused) +{ + /* IDLE_END should be safe - interrupts are back on */ + if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) + mce_notify_user(); + + return NOTIFY_OK; +} + +static struct notifier_block mce_idle_notifier = { + .notifier_call = mce_idle_callback, +}; + +static __init int periodic_mcheck_init(void) +{ + idle_notifier_register(&mce_idle_notifier); + return 0; +} +__initcall(periodic_mcheck_init); + +/* + * Initialize Machine Checks for a CPU. + */ +static int mce_cap_init(void) +{ + unsigned b; + u64 cap; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + b = cap & 0xff; + if (b > MAX_NR_BANKS) { + printk(KERN_WARNING + "MCE: Using only %u machine check banks out of %u\n", + MAX_NR_BANKS, b); + b = MAX_NR_BANKS; + } + + /* Don't support asymmetric configurations today */ + WARN_ON(banks != 0 && b != banks); + banks = b; + if (!bank) { + bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); + if (!bank) + return -ENOMEM; + memset(bank, 0xff, banks * sizeof(u64)); + } + + /* Use accurate RIP reporting if available. */ + if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) + rip_msr = MSR_IA32_MCG_EIP; + + return 0; +} + +static void mce_init(void *dummy) +{ + mce_banks_t all_banks; + u64 cap; + int i; + + /* + * Log the machine checks left over from the previous reset. + */ + bitmap_fill(all_banks, MAX_NR_BANKS); + machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); + + set_in_cr4(X86_CR4_MCE); + + rdmsrl(MSR_IA32_MCG_CAP, cap); + if (cap & MCG_CTL_P) + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = 0; i < banks; i++) { + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } +} + +/* Add per CPU specific workarounds here */ +static void mce_cpu_quirks(struct cpuinfo_x86 *c) +{ + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86_vendor == X86_VENDOR_AMD) { + if (c->x86 == 15 && banks > 4) { + /* + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: + */ + clear_bit(10, (unsigned long *)&bank[4]); + } + if (c->x86 <= 17 && mce_bootlog < 0) { + /* + * Lots of broken BIOS around that don't clear them + * by default and leave crap in there. Don't log: + */ + mce_bootlog = 0; + } + } + +} + +static void mce_cpu_features(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_init(c); + break; + case X86_VENDOR_AMD: + mce_amd_feature_init(c); + break; + default: + break; + } +} + +static void mce_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + int *n = &__get_cpu_var(next_interval); + + *n = check_interval * HZ; + if (!*n) + return; + setup_timer(t, mcheck_timer, smp_processor_id()); + t->expires = round_jiffies(jiffies + *n); + add_timer(t); +} + +/* + * Called for each booted CPU to set up machine checks. + * Must be called with preempt off: + */ +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +{ + if (!mce_available(c)) + return; + + if (mce_cap_init() < 0) { + mce_dont_init = 1; + return; + } + mce_cpu_quirks(c); + + mce_init(NULL); + mce_cpu_features(c); + mce_init_timer(); +} + +/* + * Character device to read and clear the MCE log. + */ + +static DEFINE_SPINLOCK(mce_state_lock); +static int open_count; /* #times opened */ +static int open_exclu; /* already open exclusive? */ + +static int mce_open(struct inode *inode, struct file *file) +{ + lock_kernel(); + spin_lock(&mce_state_lock); + + if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&mce_state_lock); + unlock_kernel(); + + return -EBUSY; + } + + if (file->f_flags & O_EXCL) + open_exclu = 1; + open_count++; + + spin_unlock(&mce_state_lock); + unlock_kernel(); + + return nonseekable_open(inode, file); +} + +static int mce_release(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + open_count--; + open_exclu = 0; + + spin_unlock(&mce_state_lock); + + return 0; +} + +static void collect_tscs(void *data) +{ + unsigned long *cpu_tsc = (unsigned long *)data; + + rdtscll(cpu_tsc[smp_processor_id()]); +} + +static DEFINE_MUTEX(mce_read_mutex); + +static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, + loff_t *off) +{ + char __user *buf = ubuf; + unsigned long *cpu_tsc; + unsigned prev, next; + int i, err; + + cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); + if (!cpu_tsc) + return -ENOMEM; + + mutex_lock(&mce_read_mutex); + next = rcu_dereference(mcelog.next); + + /* Only supports full reads right now */ + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { + mutex_unlock(&mce_read_mutex); + kfree(cpu_tsc); + + return -EINVAL; + } + + err = 0; + prev = 0; + do { + for (i = prev; i < next; i++) { + unsigned long start = jiffies; + + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i, 0, + sizeof(struct mce)); + goto timeout; + } + cpu_relax(); + } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, + sizeof(struct mce)); + buf += sizeof(struct mce); +timeout: + ; + } + + memset(mcelog.entry + prev, 0, + (next - prev) * sizeof(struct mce)); + prev = next; + next = cmpxchg(&mcelog.next, prev, 0); + } while (next != prev); + + synchronize_sched(); + + /* + * Collect entries that were still getting written before the + * synchronize. + */ + on_each_cpu(collect_tscs, cpu_tsc, 1); + + for (i = next; i < MCE_LOG_LEN; i++) { + if (mcelog.entry[i].finished && + mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { + err |= copy_to_user(buf, mcelog.entry+i, + sizeof(struct mce)); + smp_rmb(); + buf += sizeof(struct mce); + memset(&mcelog.entry[i], 0, sizeof(struct mce)); + } + } + mutex_unlock(&mce_read_mutex); + kfree(cpu_tsc); + + return err ? -EFAULT : buf - ubuf; +} + +static unsigned int mce_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &mce_wait, wait); + if (rcu_dereference(mcelog.next)) + return POLLIN | POLLRDNORM; + return 0; +} + +static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int __user *p = (int __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case MCE_GET_RECORD_LEN: + return put_user(sizeof(struct mce), p); + case MCE_GET_LOG_LEN: + return put_user(MCE_LOG_LEN, p); + case MCE_GETCLEAR_FLAGS: { + unsigned flags; + + do { + flags = mcelog.flags; + } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + + return put_user(flags, p); + } + default: + return -ENOTTY; + } +} + +static const struct file_operations mce_chrdev_ops = { + .open = mce_open, + .release = mce_release, + .read = mce_read, + .poll = mce_poll, + .unlocked_ioctl = mce_ioctl, +}; + +static struct miscdevice mce_log_device = { + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, +}; + +/* + * Old style boot options parsing. Only for compatibility. + */ +static int __init mcheck_disable(char *str) +{ + mce_dont_init = 1; + return 1; +} +__setup("nomce", mcheck_disable); + +/* + * mce=off disables machine check + * mce=TOLERANCELEVEL (number, see above) + * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + * mce=nobootlog Don't log MCEs from before booting. + */ +static int __init mcheck_enable(char *str) +{ + if (!strcmp(str, "off")) + mce_dont_init = 1; + else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) + mce_bootlog = (str[0] == 'b'); + else if (isdigit(str[0])) + get_option(&str, &tolerant); + else { + printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n", + str); + return 0; + } + return 1; +} +__setup("mce=", mcheck_enable); + +/* + * Sysfs support + */ + +/* + * Disable machine checks on suspend and shutdown. We can't really handle + * them later. + */ +static int mce_disable(void) +{ + int i; + + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + return 0; +} + +static int mce_suspend(struct sys_device *dev, pm_message_t state) +{ + return mce_disable(); +} + +static int mce_shutdown(struct sys_device *dev) +{ + return mce_disable(); +} + +/* + * On resume clear all MCE state. Don't want to see leftovers from the BIOS. + * Only one CPU is active at this time, the others get re-added later using + * CPU hotplug: + */ +static int mce_resume(struct sys_device *dev) +{ + mce_init(NULL); + mce_cpu_features(¤t_cpu_data); + + return 0; +} + +static void mce_cpu_restart(void *data) +{ + del_timer_sync(&__get_cpu_var(mce_timer)); + if (mce_available(¤t_cpu_data)) + mce_init(NULL); + mce_init_timer(); +} + +/* Reinit MCEs after user configuration changes */ +static void mce_restart(void) +{ + on_each_cpu(mce_cpu_restart, NULL, 1); +} + +static struct sysdev_class mce_sysclass = { + .suspend = mce_suspend, + .shutdown = mce_shutdown, + .resume = mce_resume, + .name = "machinecheck", +}; + +DEFINE_PER_CPU(struct sys_device, device_mce); + +__cpuinitdata +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); + +/* Why are there no generic functions for this? */ +#define ACCESSOR(name, var, start) \ + static ssize_t show_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + char *buf) { \ + return sprintf(buf, "%lx\n", (unsigned long)var); \ + } \ + static ssize_t set_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + const char *buf, size_t siz) { \ + char *end; \ + unsigned long new = simple_strtoul(buf, &end, 0); \ + \ + if (end == buf) \ + return -EINVAL; \ + var = new; \ + start; \ + \ + return end-buf; \ + } \ + static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); + +static struct sysdev_attribute *bank_attrs; + +static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, + char *buf) +{ + u64 b = bank[attr - bank_attrs]; + + return sprintf(buf, "%llx\n", b); +} + +static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, + const char *buf, size_t siz) +{ + char *end; + u64 new = simple_strtoull(buf, &end, 0); + + if (end == buf) + return -EINVAL; + + bank[attr - bank_attrs] = new; + mce_restart(); + + return end-buf; +} + +static ssize_t +show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) +{ + strcpy(buf, trigger); + strcat(buf, "\n"); + return strlen(trigger) + 1; +} + +static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, + const char *buf, size_t siz) +{ + char *p; + int len; + + strncpy(trigger, buf, sizeof(trigger)); + trigger[sizeof(trigger)-1] = 0; + len = strlen(trigger); + p = strchr(trigger, '\n'); + + if (*p) + *p = 0; + + return len; +} + +static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); +static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); + +ACCESSOR(check_interval, check_interval, mce_restart()) + +static struct sysdev_attribute *mce_attributes[] = { + &attr_tolerant.attr, &attr_check_interval, &attr_trigger, + NULL +}; + +static cpumask_var_t mce_device_initialized; + +/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ +static __cpuinit int mce_create_device(unsigned int cpu) +{ + int err; + int i; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); + per_cpu(device_mce, cpu).id = cpu; + per_cpu(device_mce, cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce, cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce, cpu), + mce_attributes[i]); + if (err) + goto error; + } + for (i = 0; i < banks; i++) { + err = sysdev_create_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + if (err) + goto error2; + } + cpumask_set_cpu(cpu, mce_device_initialized); + + return 0; +error2: + while (--i >= 0) { + sysdev_remove_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + } +error: + while (--i >= 0) { + sysdev_remove_file(&per_cpu(device_mce, cpu), + mce_attributes[i]); + } + sysdev_unregister(&per_cpu(device_mce, cpu)); + + return err; +} + +static __cpuinit void mce_remove_device(unsigned int cpu) +{ + int i; + + if (!cpumask_test_cpu(cpu, mce_device_initialized)) + return; + + for (i = 0; mce_attributes[i]; i++) + sysdev_remove_file(&per_cpu(device_mce, cpu), + mce_attributes[i]); + for (i = 0; i < banks; i++) + sysdev_remove_file(&per_cpu(device_mce, cpu), + &bank_attrs[i]); + sysdev_unregister(&per_cpu(device_mce, cpu)); + cpumask_clear_cpu(cpu, mce_device_initialized); +} + +/* Make sure there are no machine checks on offlined CPUs. */ +static void mce_disable_cpu(void *h) +{ + int i; + unsigned long action = *(unsigned long *)h; + + if (!mce_available(¤t_cpu_data)) + return; + if (!(action & CPU_TASKS_FROZEN)) + cmci_clear(); + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); +} + +static void mce_reenable_cpu(void *h) +{ + unsigned long action = *(unsigned long *)h; + int i; + + if (!mce_available(¤t_cpu_data)) + return; + + if (!(action & CPU_TASKS_FROZEN)) + cmci_reenable(); + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); +} + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static int __cpuinit +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct timer_list *t = &per_cpu(mce_timer, cpu); + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); + mce_remove_device(cpu); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + del_timer_sync(t); + smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + break; + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + t->expires = round_jiffies(jiffies + + __get_cpu_var(next_interval)); + add_timer_on(t, cpu); + smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + break; + case CPU_POST_DEAD: + /* intentionally ignoring frozen here */ + cmci_rediscover(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier __cpuinitdata = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_banks(void) +{ + int i; + + bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, + GFP_KERNEL); + if (!bank_attrs) + return -ENOMEM; + + for (i = 0; i < banks; i++) { + struct sysdev_attribute *a = &bank_attrs[i]; + + a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); + if (!a->attr.name) + goto nomem; + + a->attr.mode = 0644; + a->show = show_bank; + a->store = set_bank; + } + return 0; + +nomem: + while (--i >= 0) + kfree(bank_attrs[i].attr.name); + kfree(bank_attrs); + bank_attrs = NULL; + + return -ENOMEM; +} + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + + alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + + err = mce_init_banks(); + if (err) + return err; + + err = sysdev_class_register(&mce_sysclass); + if (err) + return err; + + for_each_online_cpu(i) { + err = mce_create_device(i); + if (err) + return err; + } + + register_hotcpu_notifier(&mce_cpu_notifier); + misc_register(&mce_log_device); + + return err; +} + +device_initcall(mce_init_device); + +#else /* CONFIG_X86_32: */ + +int mce_disabled; + +int nr_mce_banks; +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled == 1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86 == 5) + intel_p5_mcheck_init(c); + if (c->x86 == 6) + intel_p6_mcheck_init(c); + if (c->x86 == 15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86 == 5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); + +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c deleted file mode 100644 index 05979e7eff12..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * mce.c - x86 Machine Check Exception Reporting - * (c) 2002 Alan Cox , Dave Jones - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -int mce_disabled; - -int nr_mce_banks; -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled == 1) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86 == 5) - intel_p5_mcheck_init(c); - if (c->x86 == 6) - intel_p6_mcheck_init(c); - if (c->x86 == 15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86 == 5) - winchip_mcheck_init(c); - break; - - default: - break; - } -} - -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} - -static int __init mcheck_enable(char *str) -{ - mce_disabled = -1; - return 1; -} - -__setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c deleted file mode 100644 index 2e2c3d2e9586..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ /dev/null @@ -1,1311 +0,0 @@ -/* - * Machine check handler. - * - * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. - * Rest from unknown author(s). - * 2004 Andi Kleen. Rewrote most of it. - * Copyright 2008 Intel Corporation - * Author: Andi Kleen - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "mce.h" - -#ifdef CONFIG_X86_64 - -#define MISC_MCELOG_MINOR 227 - -atomic_t mce_entry; - -static int mce_dont_init; - -/* - * Tolerant levels: - * 0: always panic on uncorrected errors, log corrected errors - * 1: panic or SIGBUS on uncorrected errors, log corrected errors - * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors - * 3: never panic or SIGBUS, log all errors (for testing only) - */ -static int tolerant = 1; -static int banks; -static u64 *bank; -static unsigned long notify_user; -static int rip_msr; -static int mce_bootlog = -1; -static atomic_t mce_events; - -static char trigger[128]; -static char *trigger_argv[2] = { trigger, NULL }; - -static DECLARE_WAIT_QUEUE_HEAD(mce_wait); - -/* MCA banks polled by the period polling timer for corrected events */ -DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { - [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL -}; - -/* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) -{ - memset(m, 0, sizeof(struct mce)); - m->cpu = smp_processor_id(); - rdtscll(m->tsc); -} - -/* - * Lockless MCE logging infrastructure. - * This avoids deadlocks on printk locks without having to break locks. Also - * separate MCEs from kernel messages to avoid bogus bug reports. - */ - -static struct mce_log mcelog = { - MCE_LOG_SIGNATURE, - MCE_LOG_LEN, -}; - -void mce_log(struct mce *mce) -{ - unsigned next, entry; - - atomic_inc(&mce_events); - mce->finished = 0; - wmb(); - for (;;) { - entry = rcu_dereference(mcelog.next); - for (;;) { - /* - * When the buffer fills up discard new entries. - * Assume that the earlier errors are the more - * interesting ones: - */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); - return; - } - /* Old left over entry. Skip: */ - if (mcelog.entry[entry].finished) { - entry++; - continue; - } - break; - } - smp_rmb(); - next = entry + 1; - if (cmpxchg(&mcelog.next, entry, next) == entry) - break; - } - memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - wmb(); - mcelog.entry[entry].finished = 1; - wmb(); - - set_bit(0, ¬ify_user); -} - -static void print_mce(struct mce *m) -{ - printk(KERN_EMERG "\n" - KERN_EMERG "HARDWARE ERROR\n" - KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", - m->cpu, m->mcgstatus, m->bank, m->status); - if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); - if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->ip); - printk("\n"); - } - printk(KERN_EMERG "TSC %llx ", m->tsc); - if (m->addr) - printk("ADDR %llx ", m->addr); - if (m->misc) - printk("MISC %llx ", m->misc); - printk("\n"); - printk(KERN_EMERG "This is not a software problem!\n"); - printk(KERN_EMERG "Run through mcelog --ascii to decode " - "and contact your hardware vendor\n"); -} - -static void mce_panic(char *msg, struct mce *backup, unsigned long start) -{ - int i; - - oops_begin(); - for (i = 0; i < MCE_LOG_LEN; i++) { - unsigned long tsc = mcelog.entry[i].tsc; - - if (time_before(tsc, start)) - continue; - print_mce(&mcelog.entry[i]); - if (backup && mcelog.entry[i].tsc == backup->tsc) - backup = NULL; - } - if (backup) - print_mce(backup); - panic(msg); -} - -int mce_available(struct cpuinfo_x86 *c) -{ - if (mce_dont_init) - return 0; - return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); -} - -static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) -{ - if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { - m->ip = regs->ip; - m->cs = regs->cs; - } else { - m->ip = 0; - m->cs = 0; - } - if (rip_msr) { - /* Assume the RIP in the MSR is exact. Is this true? */ - m->mcgstatus |= MCG_STATUS_EIPV; - rdmsrl(rip_msr, m->ip); - m->cs = 0; - } -} - -/* - * Poll for corrected events or events that happened before reset. - * Those are just logged through /dev/mcelog. - * - * This is executed in standard interrupt context. - */ -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) -{ - struct mce m; - int i; - - mce_setup(&m); - - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - for (i = 0; i < banks; i++) { - if (!bank[i] || !test_bit(i, *b)) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - m.tsc = 0; - - barrier(); - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); - if (!(m.status & MCI_STATUS_VAL)) - continue; - - /* - * Uncorrected events are handled by the exception handler - * when it is enabled. But when the exception is disabled log - * everything. - * - * TBD do the same check for MCI_STATUS_EN here? - */ - if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) - continue; - - if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); - if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); - - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - /* - * Don't get the IP here because it's unlikely to - * have anything to do with the actual error location. - */ - if (!(flags & MCP_DONTLOG)) { - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); - } - - /* - * Clear state for this bank. - */ - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } - - /* - * Don't clear MCG_STATUS here because it's only defined for - * exceptions. - */ -} - -/* - * The actual machine check handler. This only handles real - * exceptions when something got corrupted coming in through int 18. - * - * This is executed in NMI context not subject to normal locking rules. This - * implies that most kernel services cannot be safely used. Don't even - * think about putting a printk in there! - */ -void do_machine_check(struct pt_regs *regs, long error_code) -{ - struct mce m, panicm; - int panicm_found = 0; - u64 mcestart = 0; - int i; - /* - * If no_way_out gets set, there is no safe way to recover from this - * MCE. If tolerant is cranked up, we'll try anyway. - */ - int no_way_out = 0; - /* - * If kill_it gets set, there might be a way to recover from this - * error. - */ - int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); - - atomic_inc(&mce_entry); - - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out2; - if (!banks) - goto out2; - - mce_setup(&m); - - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - - /* if the restart IP is not valid, we're done for */ - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - no_way_out = 1; - - rdtscll(mcestart); - barrier(); - - for (i = 0; i < banks; i++) { - __clear_bit(i, toclear); - if (!bank[i]) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); - if ((m.status & MCI_STATUS_VAL) == 0) - continue; - - /* - * Non uncorrected errors are handled by machine_check_poll - * Leave them alone. - */ - if ((m.status & MCI_STATUS_UC) == 0) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK); - - __set_bit(i, toclear); - - if (m.status & MCI_STATUS_EN) { - /* if PCC was set, there's no way out */ - no_way_out |= !!(m.status & MCI_STATUS_PCC); - /* - * If this error was uncorrectable and there was - * an overflow, we're in trouble. If no overflow, - * we might get away with just killing a task. - */ - if (m.status & MCI_STATUS_UC) { - if (tolerant < 1 || m.status & MCI_STATUS_OVER) - no_way_out = 1; - kill_it = 1; - } - } else { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; - } - - if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); - if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); - - mce_get_rip(&m, regs); - mce_log(&m); - - /* - * Did this bank cause the exception? - * - * Assume that the bank with uncorrectable errors did it, - * and that there is only a single one: - */ - if ((m.status & MCI_STATUS_UC) && - (m.status & MCI_STATUS_EN)) { - panicm = m; - panicm_found = 1; - } - } - - /* - * If we didn't find an uncorrectable error, pick - * the last one (shouldn't happen, just being safe). - */ - if (!panicm_found) - panicm = m; - - /* - * If we have decided that we just CAN'T continue, and the user - * has not set tolerant to an insane level, give up and die. - */ - if (no_way_out && tolerant < 3) - mce_panic("Machine check", &panicm, mcestart); - - /* - * If the error seems to be unrecoverable, something should be - * done. Try to kill as little as possible. If we can kill just - * one task, do that. If the user has set the tolerance very - * high, don't try to do anything at all. - */ - if (kill_it && tolerant < 3) { - int user_space = 0; - - /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. - */ - if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = panicm.ip && (panicm.cs & 3); - - /* - * If we know that the error was in user space, send a - * SIGBUS. Otherwise, panic if tolerance is low. - * - * force_sig() takes an awful lot of locks and has a slight - * risk of deadlocking. - */ - if (user_space) { - force_sig(SIGBUS, current); - } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", - &panicm, mcestart); - } - } - - /* notify userspace ASAP */ - set_thread_flag(TIF_MCE_NOTIFY); - - /* the last thing we do is clear state */ - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } - wrmsrl(MSR_IA32_MCG_STATUS, 0); - out2: - atomic_dec(&mce_entry); -} - -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - -/* - * Periodic polling timer for "silent" machine check errors. If the - * poller finds an MCE, poll 2x faster. When the poller finds no more - * errors, poll 2x slower (up to check_interval seconds). - */ -static int check_interval = 5 * 60; /* 5 minutes */ - -static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); - -static void mcheck_timer(unsigned long data) -{ - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; - - WARN_ON(smp_processor_id() != data); - - if (mce_available(¤t_cpu_data)) { - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); - } - - /* - * Alert userspace if needed. If we logged an MCE, reduce the - * polling interval, otherwise increase the polling interval. - */ - n = &__get_cpu_var(next_interval); - if (mce_notify_user()) { - *n = max(*n/2, HZ/100); - } else { - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); - } - - t->expires = jiffies + *n; - add_timer(t); -} - -static void mce_do_trigger(struct work_struct *work) -{ - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); -} - -static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - -/* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. - */ -int mce_notify_user(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - clear_thread_flag(TIF_MCE_NOTIFY); - - if (test_and_clear_bit(0, ¬ify_user)) { - wake_up_interruptible(&mce_wait); - - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (trigger[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - printk(KERN_INFO "Machine check events logged\n"); - - return 1; - } - return 0; -} - -/* see if the idle task needs to notify userspace: */ -static int -mce_idle_callback(struct notifier_block *nfb, unsigned long action, - void *unused) -{ - /* IDLE_END should be safe - interrupts are back on */ - if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) - mce_notify_user(); - - return NOTIFY_OK; -} - -static struct notifier_block mce_idle_notifier = { - .notifier_call = mce_idle_callback, -}; - -static __init int periodic_mcheck_init(void) -{ - idle_notifier_register(&mce_idle_notifier); - return 0; -} -__initcall(periodic_mcheck_init); - -/* - * Initialize Machine Checks for a CPU. - */ -static int mce_cap_init(void) -{ - unsigned b; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - b = cap & 0xff; - if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); - b = MAX_NR_BANKS; - } - - /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; - if (!bank) { - bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); - if (!bank) - return -ENOMEM; - memset(bank, 0xff, banks * sizeof(u64)); - } - - /* Use accurate RIP reporting if available. */ - if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) - rip_msr = MSR_IA32_MCG_EIP; - - return 0; -} - -static void mce_init(void *dummy) -{ - mce_banks_t all_banks; - u64 cap; - int i; - - /* - * Log the machine checks left over from the previous reset. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); - - set_in_cr4(X86_CR4_MCE); - - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (cap & MCG_CTL_P) - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - - for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } -} - -/* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) -{ - /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&bank[4]); - } - if (c->x86 <= 17 && mce_bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - mce_bootlog = 0; - } - } - -} - -static void mce_cpu_features(struct cpuinfo_x86 *c) -{ - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - mce_intel_feature_init(c); - break; - case X86_VENDOR_AMD: - mce_amd_feature_init(c); - break; - default: - break; - } -} - -static void mce_init_timer(void) -{ - struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(next_interval); - - *n = check_interval * HZ; - if (!*n) - return; - setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies(jiffies + *n); - add_timer(t); -} - -/* - * Called for each booted CPU to set up machine checks. - * Must be called with preempt off: - */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) -{ - if (!mce_available(c)) - return; - - if (mce_cap_init() < 0) { - mce_dont_init = 1; - return; - } - mce_cpu_quirks(c); - - mce_init(NULL); - mce_cpu_features(c); - mce_init_timer(); -} - -/* - * Character device to read and clear the MCE log. - */ - -static DEFINE_SPINLOCK(mce_state_lock); -static int open_count; /* #times opened */ -static int open_exclu; /* already open exclusive? */ - -static int mce_open(struct inode *inode, struct file *file) -{ - lock_kernel(); - spin_lock(&mce_state_lock); - - if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { - spin_unlock(&mce_state_lock); - unlock_kernel(); - - return -EBUSY; - } - - if (file->f_flags & O_EXCL) - open_exclu = 1; - open_count++; - - spin_unlock(&mce_state_lock); - unlock_kernel(); - - return nonseekable_open(inode, file); -} - -static int mce_release(struct inode *inode, struct file *file) -{ - spin_lock(&mce_state_lock); - - open_count--; - open_exclu = 0; - - spin_unlock(&mce_state_lock); - - return 0; -} - -static void collect_tscs(void *data) -{ - unsigned long *cpu_tsc = (unsigned long *)data; - - rdtscll(cpu_tsc[smp_processor_id()]); -} - -static DEFINE_MUTEX(mce_read_mutex); - -static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, - loff_t *off) -{ - char __user *buf = ubuf; - unsigned long *cpu_tsc; - unsigned prev, next; - int i, err; - - cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); - if (!cpu_tsc) - return -ENOMEM; - - mutex_lock(&mce_read_mutex); - next = rcu_dereference(mcelog.next); - - /* Only supports full reads right now */ - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return -EINVAL; - } - - err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - - while (!mcelog.entry[i].finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(mcelog.entry + i, 0, - sizeof(struct mce)); - goto timeout; - } - cpu_relax(); - } - smp_rmb(); - err |= copy_to_user(buf, mcelog.entry + i, - sizeof(struct mce)); - buf += sizeof(struct mce); -timeout: - ; - } - - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); - - synchronize_sched(); - - /* - * Collect entries that were still getting written before the - * synchronize. - */ - on_each_cpu(collect_tscs, cpu_tsc, 1); - - for (i = next; i < MCE_LOG_LEN; i++) { - if (mcelog.entry[i].finished && - mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { - err |= copy_to_user(buf, mcelog.entry+i, - sizeof(struct mce)); - smp_rmb(); - buf += sizeof(struct mce); - memset(&mcelog.entry[i], 0, sizeof(struct mce)); - } - } - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return err ? -EFAULT : buf - ubuf; -} - -static unsigned int mce_poll(struct file *file, poll_table *wait) -{ - poll_wait(file, &mce_wait, wait); - if (rcu_dereference(mcelog.next)) - return POLLIN | POLLRDNORM; - return 0; -} - -static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -{ - int __user *p = (int __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - switch (cmd) { - case MCE_GET_RECORD_LEN: - return put_user(sizeof(struct mce), p); - case MCE_GET_LOG_LEN: - return put_user(MCE_LOG_LEN, p); - case MCE_GETCLEAR_FLAGS: { - unsigned flags; - - do { - flags = mcelog.flags; - } while (cmpxchg(&mcelog.flags, flags, 0) != flags); - - return put_user(flags, p); - } - default: - return -ENOTTY; - } -} - -static const struct file_operations mce_chrdev_ops = { - .open = mce_open, - .release = mce_release, - .read = mce_read, - .poll = mce_poll, - .unlocked_ioctl = mce_ioctl, -}; - -static struct miscdevice mce_log_device = { - MISC_MCELOG_MINOR, - "mcelog", - &mce_chrdev_ops, -}; - -/* - * Old style boot options parsing. Only for compatibility. - */ -static int __init mcheck_disable(char *str) -{ - mce_dont_init = 1; - return 1; -} -__setup("nomce", mcheck_disable); - -/* - * mce=off disables machine check - * mce=TOLERANCELEVEL (number, see above) - * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. - * mce=nobootlog Don't log MCEs from before booting. - */ -static int __init mcheck_enable(char *str) -{ - if (!strcmp(str, "off")) - mce_dont_init = 1; - else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) - mce_bootlog = (str[0] == 'b'); - else if (isdigit(str[0])) - get_option(&str, &tolerant); - else { - printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n", - str); - return 0; - } - return 1; -} -__setup("mce=", mcheck_enable); - -/* - * Sysfs support - */ - -/* - * Disable machine checks on suspend and shutdown. We can't really handle - * them later. - */ -static int mce_disable(void) -{ - int i; - - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); - return 0; -} - -static int mce_suspend(struct sys_device *dev, pm_message_t state) -{ - return mce_disable(); -} - -static int mce_shutdown(struct sys_device *dev) -{ - return mce_disable(); -} - -/* - * On resume clear all MCE state. Don't want to see leftovers from the BIOS. - * Only one CPU is active at this time, the others get re-added later using - * CPU hotplug: - */ -static int mce_resume(struct sys_device *dev) -{ - mce_init(NULL); - mce_cpu_features(¤t_cpu_data); - - return 0; -} - -static void mce_cpu_restart(void *data) -{ - del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(NULL); - mce_init_timer(); -} - -/* Reinit MCEs after user configuration changes */ -static void mce_restart(void) -{ - on_each_cpu(mce_cpu_restart, NULL, 1); -} - -static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, - .resume = mce_resume, - .name = "machinecheck", -}; - -DEFINE_PER_CPU(struct sys_device, device_mce); - -__cpuinitdata -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); - -/* Why are there no generic functions for this? */ -#define ACCESSOR(name, var, start) \ - static ssize_t show_ ## name(struct sys_device *s, \ - struct sysdev_attribute *attr, \ - char *buf) { \ - return sprintf(buf, "%lx\n", (unsigned long)var); \ - } \ - static ssize_t set_ ## name(struct sys_device *s, \ - struct sysdev_attribute *attr, \ - const char *buf, size_t siz) { \ - char *end; \ - unsigned long new = simple_strtoul(buf, &end, 0); \ - \ - if (end == buf) \ - return -EINVAL; \ - var = new; \ - start; \ - \ - return end-buf; \ - } \ - static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); - -static struct sysdev_attribute *bank_attrs; - -static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, - char *buf) -{ - u64 b = bank[attr - bank_attrs]; - - return sprintf(buf, "%llx\n", b); -} - -static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) -{ - char *end; - u64 new = simple_strtoull(buf, &end, 0); - - if (end == buf) - return -EINVAL; - - bank[attr - bank_attrs] = new; - mce_restart(); - - return end-buf; -} - -static ssize_t -show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) -{ - strcpy(buf, trigger); - strcat(buf, "\n"); - return strlen(trigger) + 1; -} - -static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) -{ - char *p; - int len; - - strncpy(trigger, buf, sizeof(trigger)); - trigger[sizeof(trigger)-1] = 0; - len = strlen(trigger); - p = strchr(trigger, '\n'); - - if (*p) - *p = 0; - - return len; -} - -static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); -static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); - -ACCESSOR(check_interval, check_interval, mce_restart()) - -static struct sysdev_attribute *mce_attributes[] = { - &attr_tolerant.attr, &attr_check_interval, &attr_trigger, - NULL -}; - -static cpumask_var_t mce_device_initialized; - -/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ -static __cpuinit int mce_create_device(unsigned int cpu) -{ - int err; - int i; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); - per_cpu(device_mce, cpu).id = cpu; - per_cpu(device_mce, cpu).cls = &mce_sysclass; - - err = sysdev_register(&per_cpu(device_mce, cpu)); - if (err) - return err; - - for (i = 0; mce_attributes[i]; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); - if (err) - goto error; - } - for (i = 0; i < banks; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - if (err) - goto error2; - } - cpumask_set_cpu(cpu, mce_device_initialized); - - return 0; -error2: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - } -error: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); - } - sysdev_unregister(&per_cpu(device_mce, cpu)); - - return err; -} - -static __cpuinit void mce_remove_device(unsigned int cpu) -{ - int i; - - if (!cpumask_test_cpu(cpu, mce_device_initialized)) - return; - - for (i = 0; mce_attributes[i]; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); - for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - sysdev_unregister(&per_cpu(device_mce, cpu)); - cpumask_clear_cpu(cpu, mce_device_initialized); -} - -/* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) -{ - int i; - unsigned long action = *(unsigned long *)h; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_clear(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); -} - -static void mce_reenable_cpu(void *h) -{ - unsigned long action = *(unsigned long *)h; - int i; - - if (!mce_available(¤t_cpu_data)) - return; - - if (!(action & CPU_TASKS_FROZEN)) - cmci_reenable(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); -} - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int __cpuinit -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mce_create_device(cpu); - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - mce_remove_device(cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + - __get_cpu_var(next_interval)); - add_timer_on(t, cpu); - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - break; - case CPU_POST_DEAD: - /* intentionally ignoring frozen here */ - cmci_rediscover(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block mce_cpu_notifier __cpuinitdata = { - .notifier_call = mce_cpu_callback, -}; - -static __init int mce_init_banks(void) -{ - int i; - - bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, - GFP_KERNEL); - if (!bank_attrs) - return -ENOMEM; - - for (i = 0; i < banks; i++) { - struct sysdev_attribute *a = &bank_attrs[i]; - - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); - if (!a->attr.name) - goto nomem; - - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; - } - return 0; - -nomem: - while (--i >= 0) - kfree(bank_attrs[i].attr.name); - kfree(bank_attrs); - bank_attrs = NULL; - - return -ENOMEM; -} - -static __init int mce_init_device(void) -{ - int err; - int i = 0; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); - - err = mce_init_banks(); - if (err) - return err; - - err = sysdev_class_register(&mce_sysclass); - if (err) - return err; - - for_each_online_cpu(i) { - err = mce_create_device(i); - if (err) - return err; - } - - register_hotcpu_notifier(&mce_cpu_notifier); - misc_register(&mce_log_device); - - return err; -} - -device_initcall(mce_init_device); - -#else /* CONFIG_X86_32: */ - -int mce_disabled; - -int nr_mce_banks; -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled == 1) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86 == 5) - intel_p5_mcheck_init(c); - if (c->x86 == 6) - intel_p6_mcheck_init(c); - if (c->x86 == 15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86 == 5) - winchip_mcheck_init(c); - break; - - default: - break; - } -} - -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} - -static int __init mcheck_enable(char *str) -{ - mce_disabled = -1; - return 1; -} - -__setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); - -#endif /* CONFIG_X86_32 */ -- cgit v1.2.2 From cb491fca55e5282f0a95ef39c55352e00d6ca75e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:17 +0200 Subject: x86, mce: Rename sysfs variables Shorten variable names. This also compacts the code a bit. device_mce => mce_dev mce_device_initialized => mce_dev_initialized mce_attribute => mce_attrs [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 58 +++++++++++++++------------------ arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 10 +++--- 3 files changed, 33 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8488210b866f..b9972a6bc2a1 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -90,7 +90,7 @@ extern int mce_disabled; void mce_setup(struct mce *m); void mce_log(struct mce *m); -DECLARE_PER_CPU(struct sys_device, device_mce); +DECLARE_PER_CPU(struct sys_device, mce_dev); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2e2c3d2e9586..ba8dd41a10dc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -957,7 +957,7 @@ static struct sysdev_class mce_sysclass = { .name = "machinecheck", }; -DEFINE_PER_CPU(struct sys_device, device_mce); +DEFINE_PER_CPU(struct sys_device, mce_dev); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); @@ -1039,12 +1039,12 @@ static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); ACCESSOR(check_interval, check_interval, mce_restart()) -static struct sysdev_attribute *mce_attributes[] = { +static struct sysdev_attribute *mce_attrs[] = { &attr_tolerant.attr, &attr_check_interval, &attr_trigger, NULL }; -static cpumask_var_t mce_device_initialized; +static cpumask_var_t mce_dev_initialized; /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ static __cpuinit int mce_create_device(unsigned int cpu) @@ -1055,40 +1055,36 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (!mce_available(&boot_cpu_data)) return -EIO; - memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); - per_cpu(device_mce, cpu).id = cpu; - per_cpu(device_mce, cpu).cls = &mce_sysclass; + memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); + per_cpu(mce_dev, cpu).id = cpu; + per_cpu(mce_dev, cpu).cls = &mce_sysclass; - err = sysdev_register(&per_cpu(device_mce, cpu)); + err = sysdev_register(&per_cpu(mce_dev, cpu)); if (err) return err; - for (i = 0; mce_attributes[i]; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); + for (i = 0; mce_attrs[i]; i++) { + err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); if (err) goto error; } for (i = 0; i < banks; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), + err = sysdev_create_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); if (err) goto error2; } - cpumask_set_cpu(cpu, mce_device_initialized); + cpumask_set_cpu(cpu, mce_dev_initialized); return 0; error2: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - } + while (--i >= 0) + sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); error: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); - } - sysdev_unregister(&per_cpu(device_mce, cpu)); + while (--i >= 0) + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + + sysdev_unregister(&per_cpu(mce_dev, cpu)); return err; } @@ -1097,24 +1093,24 @@ static __cpuinit void mce_remove_device(unsigned int cpu) { int i; - if (!cpumask_test_cpu(cpu, mce_device_initialized)) + if (!cpumask_test_cpu(cpu, mce_dev_initialized)) return; - for (i = 0; mce_attributes[i]; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - mce_attributes[i]); + for (i = 0; mce_attrs[i]; i++) + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - sysdev_unregister(&per_cpu(device_mce, cpu)); - cpumask_clear_cpu(cpu, mce_device_initialized); + sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); + + sysdev_unregister(&per_cpu(mce_dev, cpu)); + cpumask_clear_cpu(cpu, mce_dev_initialized); } /* Make sure there are no machine checks on offlined CPUs. */ static void mce_disable_cpu(void *h) { - int i; unsigned long action = *(unsigned long *)h; + int i; if (!mce_available(¤t_cpu_data)) return; @@ -1221,7 +1217,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 4d90ec3eb51d..083f270251fa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -517,7 +517,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (!b) goto out; - err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, + err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, b->kobj, name); if (err) goto out; @@ -540,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); + b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); if (!b->kobj) goto out_free; @@ -560,7 +560,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - err = sysfs_create_link(&per_cpu(device_mce, i).kobj, + err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, b->kobj, name); if (err) goto out; @@ -638,7 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); + sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -650,7 +650,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); + sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; } -- cgit v1.2.2 From b659294b779565c60f5e12ef505328e2b974eb62 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 12:31:27 +0200 Subject: x86, mce: print number of MCE banks The number of MCE banks supported by a CPU is a useful number to know, so print it out during CPU initialization. [ Impact: add printout ] Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ba8dd41a10dc..49c74222359d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -570,6 +570,8 @@ static int mce_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & 0xff; + printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + if (b > MAX_NR_BANKS) { printk(KERN_WARNING "MCE: Using only %u machine check banks out of %u\n", @@ -1287,6 +1289,7 @@ void mcheck_init(struct cpuinfo_x86 *c) default: break; } + printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); } static int __init mcheck_disable(char *str) -- cgit v1.2.2 From ba2d0f2b0c56d7174a0208f7c463271f39040728 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Apr 2009 12:31:24 +0200 Subject: x86, mce: Cleanup symbols in intel thermal codes Decode magic constants and turn them into symbols. [ Cleanup to use symbols already exists - HS ] [ Impact: cleanup ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 7 +++++++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 +++++---- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/cpu/mcheck/p4.c | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec41fc16c167..c86404695083 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -208,7 +208,14 @@ #define MSR_IA32_THERM_CONTROL 0x0000019a #define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_LOW_ENABLE (1 << 0) +#define THERM_INT_HIGH_ENABLE (1 << 1) + #define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) + #define MSR_IA32_MISC_ENABLE 0x000001a0 /* MISC_ENABLE bits: architectural */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index bad3cbb0e566..2b011d2d8579 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -32,13 +32,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); - if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } - if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) tm2 = 1; /* Check whether a vector already exists */ @@ -54,12 +54,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); intel_set_thermal_handler(); rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); /* Unmask the thermal vector: */ l = apic_read(APIC_LVTTHMR); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 38f9632306fa..13abafcb72e4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -29,7 +29,7 @@ asmlinkage void smp_thermal_interrupt(void) irq_enter(); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & 1)) + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) mce_log_therm_throt_event(msr_val); inc_irq_stat(irq_thermal_count); diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f979ffea330b..82cee108a2d3 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -51,7 +51,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & 0x1); + therm_throt_process(msr_val & THERM_STATUS_PROCHOT); } /* Thermal interrupt handler for this CPU setup: */ -- cgit v1.2.2 From 01c6680a547a3ee8dd170c269ea8e037b3191b71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Apr 2009 12:31:24 +0200 Subject: x86, mce: Cleanup MCG definitions Decode more magic constants and turn them into symbols. [ Sort definitions bitwise, introduce MCG_EXT_CNT - HS ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 10 +++++++--- arch/x86/kernel/cpu/mcheck/mce.c | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b9972a6bc2a1..94aedaf6327f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -8,9 +8,13 @@ * Machine Check support for x86 */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 49c74222359d..147333627416 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -569,7 +569,8 @@ static int mce_cap_init(void) u64 cap; rdmsrl(MSR_IA32_MCG_CAP, cap); - b = cap & 0xff; + + b = cap & MCG_BANKCNT_MASK; printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { @@ -590,7 +591,7 @@ static int mce_cap_init(void) } /* Use accurate RIP reporting if available. */ - if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) + if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) rip_msr = MSR_IA32_MCG_EIP; return 0; -- cgit v1.2.2 From 3cde5c8c839bf46a7be799ed0e1d0b4780aaf794 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Apr 2009 18:01:31 +0200 Subject: x86, mce: initial steps to make 64bit mce code 32bit clean Replace unsigned long with u64s if they need to contain 64bit values. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 147333627416..cd1313b47506 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -156,15 +156,15 @@ static void print_mce(struct mce *m) "and contact your hardware vendor\n"); } -static void mce_panic(char *msg, struct mce *backup, unsigned long start) +static void mce_panic(char *msg, struct mce *backup, u64 start) { int i; oops_begin(); for (i = 0; i < MCE_LOG_LEN; i++) { - unsigned long tsc = mcelog.entry[i].tsc; + u64 tsc = mcelog.entry[i].tsc; - if (time_before(tsc, start)) + if ((s64)(tsc - start) < 0) continue; print_mce(&mcelog.entry[i]); if (backup && mcelog.entry[i].tsc == backup->tsc) @@ -970,13 +970,13 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); static ssize_t show_ ## name(struct sys_device *s, \ struct sysdev_attribute *attr, \ char *buf) { \ - return sprintf(buf, "%lx\n", (unsigned long)var); \ + return sprintf(buf, "%Lx\n", (u64)var); \ } \ static ssize_t set_ ## name(struct sys_device *s, \ struct sysdev_attribute *attr, \ const char *buf, size_t siz) { \ char *end; \ - unsigned long new = simple_strtoul(buf, &end, 0); \ + u64 new = simple_strtoull(buf, &end, 0); \ \ if (end == buf) \ return -EINVAL; \ -- cgit v1.2.2 From 06b7a7a5ec917761969444fee967c43868a76468 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Apr 2009 18:37:43 +0200 Subject: x86, mce: implement the PPro bank 0 quirk in the 64bit machine check code Quoting the comment: * SDM documents that on family 6 bank 0 should not be written * because it aliases to another special BIOS controlled * register. * But it's not aliased anymore on model 0x1a+ * Don't ignore bank 0 completely because there could be a valid * event later, merely don't write CTL0. This is mostly a port on the 32bit code, except that 32bit always didn't write it and didn't have the 0x1a heuristic. I checked with the CPU designers that the quirk is not required starting with this model. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cd1313b47506..1dcd3be03328 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -65,6 +65,8 @@ static atomic_t mce_events; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; +static unsigned long dont_init_banks; + static DECLARE_WAIT_QUEUE_HEAD(mce_wait); /* MCA banks polled by the period polling timer for corrected events */ @@ -72,6 +74,11 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; +static inline int skip_bank_init(int i) +{ + return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); +} + /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) { @@ -616,6 +623,8 @@ static void mce_init(void *dummy) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { + if (skip_bank_init(i)) + continue; wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } @@ -643,6 +652,19 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) } } + if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * SDM documents that on family 6 bank 0 should not be written + * because it aliases to another special BIOS controlled + * register. + * But it's not aliased anymore on model 0x1a+ + * Don't ignore bank 0 completely because there could be a + * valid event later, merely don't write CTL0. + */ + + if (c->x86 == 6 && c->x86_model < 0x1A) + __set_bit(0, &dont_init_banks); + } } static void mce_cpu_features(struct cpuinfo_x86 *c) @@ -911,8 +933,10 @@ static int mce_disable(void) { int i; - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + for (i = 0; i < banks; i++) { + if (!skip_bank_init(i)) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + } return 0; } @@ -1119,8 +1143,10 @@ static void mce_disable_cpu(void *h) return; if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + for (i = 0; i < banks; i++) { + if (!skip_bank_init(i)) + wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + } } static void mce_reenable_cpu(void *h) @@ -1133,8 +1159,10 @@ static void mce_reenable_cpu(void *h) if (!(action & CPU_TASKS_FROZEN)) cmci_reenable(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); + for (i = 0; i < banks; i++) { + if (!skip_bank_init(i)) + wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); + } } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -- cgit v1.2.2 From 2e6f694fde0a7158590e121962ca2e3c06633528 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Apr 2009 18:42:48 +0200 Subject: x86, mce: port K7 bank 0 quirk to 64bit mce code Various K7 have broken bank 0s. Don't enable it by default Port from the 32bit code. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1dcd3be03328..1336280edcc2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -650,6 +650,12 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) */ mce_bootlog = 0; } + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6) + bank[0] = 0; } if (c->x86_vendor == X86_VENDOR_INTEL) { -- cgit v1.2.2 From 5d7279268b654d1f8ac43b0eb6cd9598d9cf55fd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Apr 2009 19:25:48 +0200 Subject: x86, mce: use a call vector to call the 64bit mce handler Allows to call different machine check handlers from the low level machine check entry vector. This is needed for later when it will be used for 32bit too. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 23 ++++++++++++----------- arch/x86/kernel/cpu/mcheck/mce.h | 3 ++- arch/x86/kernel/entry_64.S | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1336280edcc2..d99318b470d8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -39,6 +39,16 @@ #include "mce.h" +/* Handle unconfigured int18 (should never happen) */ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; #ifdef CONFIG_X86_64 #define MISC_MCELOG_MINOR 227 @@ -715,6 +725,8 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) } mce_cpu_quirks(c); + machine_check_vector = do_machine_check; + mce_init(NULL); mce_cpu_features(c); mce_init_timer(); @@ -1285,17 +1297,6 @@ int mce_disabled; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index cd6cffcc2de0..966ae3c5cb11 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -7,11 +7,12 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); -#ifdef CONFIG_X86_32 /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); +#ifdef CONFIG_X86_32 + extern int nr_mce_banks; void intel_set_thermal_handler(void); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..63276c45bffa 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1382,7 +1382,7 @@ paranoiderrorentry stack_segment do_stack_segment errorentry general_protection do_general_protection errorentry page_fault do_page_fault #ifdef CONFIG_X86_MCE -paranoidzeroentry machine_check do_machine_check +paranoidzeroentry machine_check *machine_check_vector(%rip) #endif /* -- cgit v1.2.2 From 04b2b1a4df6cd0fdaa598f3c623a19c2d93cb48a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 22:50:19 +0200 Subject: x86, mce: rename 64bit mce_dont_init to mce_disabled Give it the same name as on 32bit. This makes further merging easier. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 -- arch/x86/kernel/cpu/mcheck/mce.c | 15 +++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 94aedaf6327f..c3c7ee701753 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -86,9 +86,7 @@ struct mce_log { #ifdef __KERNEL__ -#ifdef CONFIG_X86_32 extern int mce_disabled; -#endif #include diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d99318b470d8..6ab477060f56 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -49,14 +49,15 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) /* Call the installed machine check handler for this CPU setup. */ void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; + +int mce_disabled; + #ifdef CONFIG_X86_64 #define MISC_MCELOG_MINOR 227 atomic_t mce_entry; -static int mce_dont_init; - /* * Tolerant levels: * 0: always panic on uncorrected errors, log corrected errors @@ -194,7 +195,7 @@ static void mce_panic(char *msg, struct mce *backup, u64 start) int mce_available(struct cpuinfo_x86 *c) { - if (mce_dont_init) + if (mce_disabled) return 0; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -720,7 +721,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) return; if (mce_cap_init() < 0) { - mce_dont_init = 1; + mce_disabled = 1; return; } mce_cpu_quirks(c); @@ -911,7 +912,7 @@ static struct miscdevice mce_log_device = { */ static int __init mcheck_disable(char *str) { - mce_dont_init = 1; + mce_disabled = 1; return 1; } __setup("nomce", mcheck_disable); @@ -925,7 +926,7 @@ __setup("nomce", mcheck_disable); static int __init mcheck_enable(char *str) { if (!strcmp(str, "off")) - mce_dont_init = 1; + mce_disabled = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) mce_bootlog = (str[0] == 'b'); else if (isdigit(str[0])) @@ -1292,8 +1293,6 @@ device_initcall(mce_init_device); #else /* CONFIG_X86_32: */ -int mce_disabled; - int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ -- cgit v1.2.2 From d7c3c9a609563868d8a70e220399d06a25aba095 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 23:07:25 +0200 Subject: x86, mce: move mce_disabled option into common 32bit/64bit code It's the same function, so let's share it. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6ab477060f56..5395200dc9d9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -907,16 +907,6 @@ static struct miscdevice mce_log_device = { &mce_chrdev_ops, }; -/* - * Old style boot options parsing. Only for compatibility. - */ -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} -__setup("nomce", mcheck_disable); - /* * mce=off disables machine check * mce=TOLERANCELEVEL (number, see above) @@ -1327,19 +1317,22 @@ void mcheck_init(struct cpuinfo_x86 *c) printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); } -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} - static int __init mcheck_enable(char *str) { mce_disabled = -1; return 1; } -__setup("nomce", mcheck_disable); __setup("mce", mcheck_enable); -#endif /* CONFIG_X86_32 */ +#endif /* CONFIG_X86_OLD_MCE */ + +/* + * Old style boot options parsing. Only for compatibility. + */ +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} +__setup("nomce", mcheck_disable); -- cgit v1.2.2 From 8e97aef5f43ec715f394bc15015ff263b80c3ad6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 14:23:18 +0200 Subject: x86, mce: remove machine check handler idle notify on 64bit i386 has no idle notifiers, but the 64bit machine check code uses them to wake up mcelog from a fatal machine check exception. For corrected machine checks found by the poller or threshold interrupts going through an idle notifier is not needed because the wake_up can is just done directly and doesn't need the idle notifier. It is only needed for logging exceptions. To be honest I never liked the idle notifier even though I signed off on it. On closer investigation the code actually turned out to be nearly. Right now machine check exceptions on x86 are always unrecoverable (lead to panic due to PCC), which means we never execute the idle notifier path. The only exception is the somewhat weird tolerant==3 case, which ignores PCC. I'll fix this in a future patch in a much cleaner way. So remove the "mcelog wakeup through idle notifier" code from 64bit. This allows to compile the 64bit machine check handler on 32bit which doesn't have idle notifiers. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5395200dc9d9..7562c1f674f3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -555,29 +555,6 @@ int mce_notify_user(void) return 0; } -/* see if the idle task needs to notify userspace: */ -static int -mce_idle_callback(struct notifier_block *nfb, unsigned long action, - void *unused) -{ - /* IDLE_END should be safe - interrupts are back on */ - if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) - mce_notify_user(); - - return NOTIFY_OK; -} - -static struct notifier_block mce_idle_notifier = { - .notifier_call = mce_idle_callback, -}; - -static __init int periodic_mcheck_init(void) -{ - idle_notifier_register(&mce_idle_notifier); - return 0; -} -__initcall(periodic_mcheck_init); - /* * Initialize Machine Checks for a CPU. */ -- cgit v1.2.2 From d896a940ef4f12a0a6bc432853b249dcfbacabf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 14:25:18 +0200 Subject: x86, mce: remove oops_begin() use in 64bit machine check First 32bit doesn't have oops_begin, so it's a barrier of using this code on 32bit. On closer examination it turns out oops_begin is not a good idea in a machine check panic anyways. All oops_begin does it so check for recursive/parallel oopses and implement the "wait on oops" heuristic. But there's actually no good reason to lock machine checks against oopses or prevent them from recursion. Also "wait on oops" does not really make sense for a machine check too. Replace it with a manual bust_spinlocks/console_verbose. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7562c1f674f3..f4d6841d2bdf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -178,7 +178,8 @@ static void mce_panic(char *msg, struct mce *backup, u64 start) { int i; - oops_begin(); + bust_spinlocks(1); + console_verbose(); for (i = 0; i < MCE_LOG_LEN; i++) { u64 tsc = mcelog.entry[i].tsc; -- cgit v1.2.2 From 4efc0670baf4b14bc95502e54a83ccf639146125 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 19:07:31 +0200 Subject: x86, mce: use 64bit machine check code on 32bit The 64bit machine check code is in many ways much better than the 32bit machine check code: it is more specification compliant, is cleaner, only has a single code base versus one per CPU, has better infrastructure for recovery, has a cleaner way to communicate with user space etc. etc. Use the 64bit code for 32bit too. This is the second attempt to do this. There was one a couple of years ago to unify this code for 32bit and 64bit. Back then this ran into some trouble with K7s and was reverted. I believe this time the K7 problems (and some others) are addressed. I went over the old handlers and was very careful to retain all quirks. But of course this needs a lot of testing on old systems. On newer 64bit capable systems I don't expect much problems because they have been already tested with the 64bit kernel. I made this a CONFIG for now that still allows to select the old machine check code. This is mostly to make testing easier, if someone runs into a problem we can ask them to try with the CONFIG switched. The new code is default y for more coverage. Once there is confidence the 64bit code works well on older hardware too the CONFIG_X86_OLD_MCE and the associated code can be easily removed. This causes a behaviour change for 32bit installations. They now have to install the mcelog package to be able to log corrected machine checks. The 64bit machine check code only handles CPUs which support the standard Intel machine check architecture described in the IA32 SDM. The 32bit code has special support for some older CPUs which have non standard machine check architectures, in particular WinChip C3 and Intel P5. I made those a separate CONFIG option and kept them for now. The WinChip variant could be probably removed without too much pain, it doesn't really do anything interesting. P5 is also disabled by default (like it was before) because many motherboards have it miswired, but according to Alan Cox a few embedded setups use that one. Forward ported/heavily changed version of old patch, original patch included review/fixes from Thomas Gleixner, Bert Wesarg. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 33 +++++++++++++++++++++++++++++++-- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/kernel/apic/apic.c | 4 ++-- arch/x86/kernel/apic/nmi.c | 2 +- arch/x86/kernel/cpu/mcheck/Makefile | 3 ++- arch/x86/kernel/cpu/mcheck/mce.c | 32 ++++++++++++++++++++++++++++---- arch/x86/kernel/cpu/mcheck/mce.h | 18 +++++++++++++++--- arch/x86/kernel/cpu/mcheck/p5.c | 5 +++++ arch/x86/kernel/irq.c | 4 ++-- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/signal.c | 4 ++-- arch/x86/kernel/traps.c | 4 ++-- 12 files changed, 92 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a6efe0a2e9ae..c1c5ccd1937f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -789,6 +789,22 @@ config X86_MCE to disable it. MCE support simply ignores non-MCE processors like the 386 and 486, so nearly everyone can say Y here. +config X86_OLD_MCE + depends on X86_32 && X86_MCE + bool "Use legacy machine check code (will go away)" + default n + select X86_ANCIENT_MCE + ---help--- + Use the old i386 machine check code. This is merely intended for + testing in a transition period. Try this if you run into any machine + check related software problems, but report the problem to + linux-kernel. When in doubt say no. + +config X86_NEW_MCE + depends on X86_MCE + bool + default y if (!X86_OLD_MCE && X86_32) || X86_64 + config X86_MCE_INTEL def_bool y prompt "Intel MCE features" @@ -805,6 +821,15 @@ config X86_MCE_AMD Additional support for AMD specific MCE features such as the DRAM Error Threshold. +config X86_ANCIENT_MCE + def_bool n + depends on X86_32 + prompt "Support for old Pentium 5 / WinChip machine checks" + ---help--- + Include support for machine check handling on old Pentium 5 or WinChip + systems. These typically need to be enabled explicitely on the command + line. + config X86_MCE_THRESHOLD depends on X86_MCE_AMD || X86_MCE_INTEL bool @@ -812,7 +837,7 @@ config X86_MCE_THRESHOLD config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" - depends on X86_32 && X86_MCE + depends on X86_OLD_MCE ---help--- Enabling this feature starts a timer that triggers every 5 seconds which will look at the machine check registers to see if anything happened. @@ -825,11 +850,15 @@ config X86_MCE_NONFATAL config X86_MCE_P4THERMAL bool "check for P4 thermal throttling interrupt." - depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) + depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) ---help--- Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. +config X86_THERMAL_VECTOR + def_bool y + depends on X86_MCE_P4THERMAL || X86_MCE_INTEL + config VM86 bool "Enable VM86 support" if EMBEDDED default y diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..486c9e946f5c 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -52,7 +52,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) #endif -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_THERMAL_VECTOR BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2870920f246..ad532289ef2e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -843,7 +843,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_THERMAL_VECTOR if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -1962,7 +1962,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_THERMAL_VECTOR if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index ce4fbfa315a1..c4762276c17e 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) static inline int mce_in_progress(void) { -#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +#if defined(CONFIG_X86_NEW_MCE) return atomic_read(&mce_entry) > 0; #endif return 0; diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 55f01b39a105..5f8b09425d39 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,6 +1,7 @@ obj-y = mce.o therm_throt.o -obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o +obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o +obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f4d6841d2bdf..e193de44ef19 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -52,7 +52,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = int mce_disabled; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_NEW_MCE #define MISC_MCELOG_MINOR 227 @@ -662,6 +662,21 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) } } +static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +{ + if (c->x86 != 5) + return; + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + if (mce_p5_enabled()) + intel_p5_mcheck_init(c); + break; + case X86_VENDOR_CENTAUR: + winchip_mcheck_init(c); + break; + } +} + static void mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { @@ -695,6 +710,11 @@ static void mce_init_timer(void) */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { + if (mce_disabled) + return; + + mce_ancient_init(c); + if (!mce_available(c)) return; @@ -893,6 +913,10 @@ static struct miscdevice mce_log_device = { */ static int __init mcheck_enable(char *str) { + if (*str == 0) + enable_p5_mce(); + if (*str == '=') + str++; if (!strcmp(str, "off")) mce_disabled = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) @@ -900,13 +924,13 @@ static int __init mcheck_enable(char *str) else if (isdigit(str[0])) get_option(&str, &tolerant); else { - printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n", + printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", str); return 0; } return 1; } -__setup("mce=", mcheck_enable); +__setup("mce", mcheck_enable); /* * Sysfs support @@ -1259,7 +1283,7 @@ static __init int mce_init_device(void) device_initcall(mce_init_device); -#else /* CONFIG_X86_32: */ +#else /* CONFIG_X86_OLD_MCE: */ int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index 966ae3c5cb11..84a552b458c8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -1,17 +1,29 @@ #include #include +#ifdef CONFIG_X86_OLD_MCE void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); +#endif +#ifdef CONFIG_X86_ANCIENT_MCE +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); +extern int mce_p5_enable; +static inline int mce_p5_enabled(void) { return mce_p5_enable; } +static inline void enable_p5_mce(void) { mce_p5_enable = 1; } +#else +static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mce_p5_enabled(void) { return 0; } +static inline void enable_p5_mce(void) { } +#endif /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_OLD_MCE extern int nr_mce_banks; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 8812f5441830..015f481ab1b0 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -14,6 +14,9 @@ #include "mce.h" +/* By default disabled */ +int mce_p5_enable; + /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { @@ -44,9 +47,11 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MCE)) return; +#ifdef CONFIG_X86_OLD_MCE /* Default P5 to off as its often misconnected: */ if (mce_disabled != -1) return; +#endif machine_check_vector = pentium_machine_check; /* Make sure the vector pointer is visible before we enable MCEs: */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c3fe010d74c8..35eddc9ec99e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -89,7 +89,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_64 +# ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); @@ -176,7 +176,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #endif #ifdef CONFIG_X86_MCE sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_64 +# ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif #endif diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 368b0a8836f9..98846e03211e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -181,7 +181,7 @@ void __init native_init_IRQ(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) +#ifdef CONFIG_X86_THERMAL_VECTOR /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..d0851e3f77eb 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -25,11 +25,11 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include #include -#include #endif /* CONFIG_X86_64 */ #include @@ -857,7 +857,7 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +#ifdef CONFIG_X86_NEW_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) mce_notify_user(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..ad771f15bddd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -798,7 +798,8 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) return new_kesp; } -#else +#endif + asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } @@ -806,7 +807,6 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) { } -#endif /* * 'math_state_restore()' saves the current math information in the -- cgit v1.2.2 From 7856f6cce4a8cda8c1f94b99605c07d16b8d8dec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 23:32:56 +0200 Subject: x86, mce: enable MCE_INTEL for 32bit new MCE Enable the 64bit MCE_INTEL code (CMCI, thermal interrupts) for 32bit NEW_MCE. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/entry_arch.h | 4 ++++ arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/irq_vectors.h | 5 +++-- arch/x86/kernel/cpu/mcheck/threshold.c | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/irqinit_32.c | 4 ++++ arch/x86/kernel/traps.c | 2 +- 8 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1c5ccd1937f..e1c9f77f69ec 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -808,7 +808,7 @@ config X86_NEW_MCE config X86_MCE_INTEL def_bool y prompt "Intel MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC + depends on X86_NEW_MCE && X86_LOCAL_APIC ---help--- Additional support for intel specific MCE features such as the thermal monitor. diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 486c9e946f5c..b2eb9c066843 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -56,4 +56,8 @@ BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif +#ifdef CONFIG_X86_MCE_THRESHOLD +BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) +#endif + #endif diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 37555e52f980..922ee7c29693 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,7 +20,7 @@ typedef struct { #endif #ifdef CONFIG_X86_MCE unsigned int irq_thermal_count; -# ifdef CONFIG_X86_64 +# ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; # endif #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..451e24d18050 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -87,10 +87,11 @@ #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 + #ifdef CONFIG_X86_32 -/* 0xf8 - 0xf9 : free */ +/* 0xf9 : free */ #else -# define THRESHOLD_APIC_VECTOR 0xf9 # define UV_BAU_MESSAGE 0xf8 #endif diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 23ee9e730f78..d746df2909c9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -17,7 +17,7 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage void mce_threshold_interrupt(void) +asmlinkage void smp_threshold_interrupt(void) { exit_idle(); irq_enter(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 63276c45bffa..a31a7f29cffe 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1007,7 +1007,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ #endif apicinterrupt THRESHOLD_APIC_VECTOR \ - threshold_interrupt mce_threshold_interrupt + threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 98846e03211e..2512ad93dabf 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -186,6 +186,10 @@ void __init native_init_IRQ(void) alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#ifdef CONFIG_X86_MCE_THRESHOLD + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif + if (!acpi_ioapic) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ad771f15bddd..0d358c884b35 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -804,7 +804,7 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } -asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) { } -- cgit v1.2.2 From de5619dfef76ddb403eb7c6de39c0130166c5dc3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 23:34:40 +0200 Subject: x86, mce: enable MCE_AMD for 32bit NEW_MCE That's very easy using the infrastructure enabled earlier for MCE_INTEL Untested. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e1c9f77f69ec..a148e7ac0d83 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -816,7 +816,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_64 && X86_MCE && X86_LOCAL_APIC + depends on X86_NEW_MCE && X86_LOCAL_APIC ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. -- cgit v1.2.2 From 5f8c1a54cab6f449fe04d42d0661bc796fa4e73e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Apr 2009 19:29:12 +0200 Subject: x86, mce: add MSR read wrappers for easier error injection This will be used by future patches to allow machine check error injection. Right now it's a nop, except for adding some wrappers around the MSR reads. This is early in the sequence to avoid too many conflicts. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e193de44ef19..e755c95674dc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -194,6 +194,19 @@ static void mce_panic(char *msg, struct mce *backup, u64 start) panic(msg); } +/* MSR access wrappers used for error injection */ +static u64 mce_rdmsrl(u32 msr) +{ + u64 v; + rdmsrl(msr, v); + return v; +} + +static void mce_wrmsrl(u32 msr, u64 v) +{ + wrmsrl(msr, v); +} + int mce_available(struct cpuinfo_x86 *c) { if (mce_disabled) @@ -213,7 +226,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) if (rip_msr) { /* Assume the RIP in the MSR is exact. Is this true? */ m->mcgstatus |= MCG_STATUS_EIPV; - rdmsrl(rip_msr, m->ip); + m->ip = mce_rdmsrl(rip_msr); m->cs = 0; } } @@ -231,7 +244,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_setup(&m); - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); for (i = 0; i < banks; i++) { if (!bank[i] || !test_bit(i, *b)) continue; @@ -242,7 +255,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.tsc = 0; barrier(); - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); if (!(m.status & MCI_STATUS_VAL)) continue; @@ -257,9 +270,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) continue; if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; @@ -275,7 +288,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) /* * Clear state for this bank. */ - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } /* @@ -320,7 +333,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_setup(&m); - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); /* if the restart IP is not valid, we're done for */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) @@ -338,7 +351,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) m.addr = 0; m.bank = i; - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); if ((m.status & MCI_STATUS_VAL) == 0) continue; @@ -378,9 +391,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) } if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); mce_get_rip(&m, regs); mce_log(&m); @@ -449,9 +462,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* the last thing we do is clear state */ for (i = 0; i < banks; i++) { if (test_bit(i, toclear)) - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } - wrmsrl(MSR_IA32_MCG_STATUS, 0); + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); out2: atomic_dec(&mce_entry); } -- cgit v1.2.2 From ea149b36c7f511d17dd89fee734cb09778a91fa0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 29 Apr 2009 19:31:00 +0200 Subject: x86, mce: add basic error injection infrastructure Allow user programs to write mce records into /dev/mcelog. When they do that a fake machine check is triggered to test the machine check code. This uses the MCE MSR wrappers added earlier. The implementation is straight forward. There is a struct mce record per CPU and the MCE MSR accesses get data from there if there is valid data injected there. This allows to test the machine check code relatively realistically because only the lowest layer of hardware access is intercepted. The test suite and injector are available at git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 8 ++ arch/x86/include/asm/mce.h | 3 + arch/x86/kernel/cpu/mcheck/Makefile | 1 + arch/x86/kernel/cpu/mcheck/mce-inject.c | 126 ++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 39 +++++++++- 5 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/mcheck/mce-inject.c (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a148e7ac0d83..e25b6358fbe3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -835,6 +835,14 @@ config X86_MCE_THRESHOLD bool default y +config X86_MCE_INJECT + depends on X86_NEW_MCE + tristate "Machine check injector support" + ---help--- + Provide support for injecting machine checks for testing purposes. + If you don't know what a machine check is and you don't do kernel + QA it is safe to say n. + config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_OLD_MCE diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c3c7ee701753..e7d2372301ef 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -141,6 +141,9 @@ extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); extern int mce_notify_user(void); +DECLARE_PER_CPU(struct mce, injectm); +extern struct file_operations mce_chrdev_ops; + #ifdef CONFIG_X86_MCE extern void mcheck_init(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 5f8b09425d39..60ee182c6c52 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o +obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c new file mode 100644 index 000000000000..58afac4b5df5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -0,0 +1,126 @@ +/* + * Machine check injection support. + * Copyright 2008 Intel Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Authors: + * Andi Kleen + * Ying Huang + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* Update fake mce registers on current CPU. */ +static void inject_mce(struct mce *m) +{ + struct mce *i = &per_cpu(injectm, m->cpu); + + /* Make sure noone reads partially written injectm */ + i->finished = 0; + mb(); + m->finished = 0; + /* First set the fields after finished */ + i->cpu = m->cpu; + mb(); + /* Now write record in order, finished last (except above) */ + memcpy(i, m, sizeof(struct mce)); + /* Finally activate it */ + mb(); + i->finished = 1; +} + +struct delayed_mce { + struct timer_list timer; + struct mce m; +}; + +/* Inject mce on current CPU */ +static void raise_mce(unsigned long data) +{ + struct delayed_mce *dm = (struct delayed_mce *)data; + struct mce *m = &dm->m; + int cpu = m->cpu; + + inject_mce(m); + if (m->status & MCI_STATUS_UC) { + struct pt_regs regs; + memset(®s, 0, sizeof(struct pt_regs)); + regs.ip = m->ip; + regs.cs = m->cs; + printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); + do_machine_check(®s, 0); + printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); + } else { + mce_banks_t b; + memset(&b, 0xff, sizeof(mce_banks_t)); + printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); + machine_check_poll(0, &b); + mce_notify_user(); + printk(KERN_INFO "Finished machine check poll on CPU %d\n", + cpu); + } + kfree(dm); +} + +/* Error injection interface */ +static ssize_t mce_write(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off) +{ + struct delayed_mce *dm; + struct mce m; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* + * There are some cases where real MSR reads could slip + * through. + */ + if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) + return -EIO; + + if ((unsigned long)usize > sizeof(struct mce)) + usize = sizeof(struct mce); + if (copy_from_user(&m, ubuf, usize)) + return -EFAULT; + + if (m.cpu >= NR_CPUS || !cpu_online(m.cpu)) + return -EINVAL; + + dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); + if (!dm) + return -ENOMEM; + + /* + * Need to give user space some time to set everything up, + * so do it a jiffie or two later everywhere. + * Should we use a hrtimer here for better synchronization? + */ + memcpy(&dm->m, &m, sizeof(struct mce)); + setup_timer(&dm->timer, raise_mce, (unsigned long)dm); + dm->timer.expires = jiffies + 2; + add_timer_on(&dm->timer, m.cpu); + return usize; +} + +static int inject_init(void) +{ + printk(KERN_INFO "Machine check injector initialized\n"); + mce_chrdev_ops.write = mce_write; + return 0; +} + +module_init(inject_init); +/* Cannot tolerate unloading currently because we cannot + * guarantee all openers of mce_chrdev will get a reference to us. + */ +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e755c95674dc..fe216bd10f43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -98,6 +98,9 @@ void mce_setup(struct mce *m) rdtscll(m->tsc); } +DEFINE_PER_CPU(struct mce, injectm); +EXPORT_PER_CPU_SYMBOL_GPL(injectm); + /* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also @@ -194,16 +197,46 @@ static void mce_panic(char *msg, struct mce *backup, u64 start) panic(msg); } +/* Support code for software error injection */ + +static int msr_to_offset(u32 msr) +{ + unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) + return offsetof(struct mce, ip); + if (msr == MSR_IA32_MC0_STATUS + bank*4) + return offsetof(struct mce, status); + if (msr == MSR_IA32_MC0_ADDR + bank*4) + return offsetof(struct mce, addr); + if (msr == MSR_IA32_MC0_MISC + bank*4) + return offsetof(struct mce, misc); + if (msr == MSR_IA32_MCG_STATUS) + return offsetof(struct mce, mcgstatus); + return -1; +} + /* MSR access wrappers used for error injection */ static u64 mce_rdmsrl(u32 msr) { u64 v; + if (__get_cpu_var(injectm).finished) { + int offset = msr_to_offset(msr); + if (offset < 0) + return 0; + return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); + } rdmsrl(msr, v); return v; } static void mce_wrmsrl(u32 msr, u64 v) { + if (__get_cpu_var(injectm).finished) { + int offset = msr_to_offset(msr); + if (offset >= 0) + *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; + return; + } wrmsrl(msr, v); } @@ -296,6 +329,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * exceptions. */ } +EXPORT_SYMBOL_GPL(machine_check_poll); /* * The actual machine check handler. This only handles real @@ -468,6 +502,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) out2: atomic_dec(&mce_entry); } +EXPORT_SYMBOL_GPL(do_machine_check); #ifdef CONFIG_X86_MCE_INTEL /*** @@ -568,6 +603,7 @@ int mce_notify_user(void) } return 0; } +EXPORT_SYMBOL_GPL(mce_notify_user); /* * Initialize Machine Checks for a CPU. @@ -904,13 +940,14 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) } } -static const struct file_operations mce_chrdev_ops = { +struct file_operations mce_chrdev_ops = { .open = mce_open, .release = mce_release, .read = mce_read, .poll = mce_poll, .unlocked_ioctl = mce_ioctl, }; +EXPORT_SYMBOL_GPL(mce_chrdev_ops); static struct miscdevice mce_log_device = { MISC_MCELOG_MINOR, -- cgit v1.2.2 From a1ff41bfc1bb7a6d19cf958f89a9b539678781e5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 May 2009 22:16:14 -0700 Subject: x86, mce: add comment about mce_chrdev_ops being writable Add a comment explaining that mce_chrdev_ops is intentionally writable. [ Impact: comment only ] Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index fe216bd10f43..156cdf6d9181 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -940,6 +940,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) } } +/* Modified in mce-inject.c, so not static or const */ struct file_operations mce_chrdev_ops = { .open = mce_open, .release = mce_release, -- cgit v1.2.2 From 5706001aacba5d3db5f224ca135e5e91a30be39c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 May 2009 22:18:17 -0700 Subject: x86, mce: fix comment style in mce-inject.c Fix style of winged comment in mce-inject.c. [ Impact: comment only ] Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 58afac4b5df5..673c72855022 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -120,7 +120,8 @@ static int inject_init(void) } module_init(inject_init); -/* Cannot tolerate unloading currently because we cannot +/* + * Cannot tolerate unloading currently because we cannot * guarantee all openers of mce_chrdev will get a reference to us. */ MODULE_LICENSE("GPL"); -- cgit v1.2.2 From 88921be30296e126896ee4d30758f989d1c4ddfb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:51 +0200 Subject: x86, mce: synchronize core after machine check handling The example code in the IA32 SDM recommends to synchronize the CPU after machine check handling. So do that here. [ Impact: Spec compliance ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 156cdf6d9181..495c96808668 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -328,6 +328,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't clear MCG_STATUS here because it's only defined for * exceptions. */ + + sync_core(); } EXPORT_SYMBOL_GPL(machine_check_poll); @@ -501,6 +503,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); out2: atomic_dec(&mce_entry); + sync_core(); } EXPORT_SYMBOL_GPL(do_machine_check); -- cgit v1.2.2 From b56f642d2bf8c1f7c6499c1e55b23311a33cc796 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:52 +0200 Subject: x86, mce: use extended sysattrs for the check_interval attribute. Instead of using own callbacks use the generic ones provided by the sysdev later. This finally allows to get rid of the ugly ACCESSOR macros. Should also save some text size. [ Impact: cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 495c96808668..3cac9da7ce24 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1054,28 +1054,6 @@ DEFINE_PER_CPU(struct sys_device, mce_dev); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); -/* Why are there no generic functions for this? */ -#define ACCESSOR(name, var, start) \ - static ssize_t show_ ## name(struct sys_device *s, \ - struct sysdev_attribute *attr, \ - char *buf) { \ - return sprintf(buf, "%Lx\n", (u64)var); \ - } \ - static ssize_t set_ ## name(struct sys_device *s, \ - struct sysdev_attribute *attr, \ - const char *buf, size_t siz) { \ - char *end; \ - u64 new = simple_strtoull(buf, &end, 0); \ - \ - if (end == buf) \ - return -EINVAL; \ - var = new; \ - start; \ - \ - return end-buf; \ - } \ - static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); - static struct sysdev_attribute *bank_attrs; static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, @@ -1126,13 +1104,26 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return len; } +static ssize_t store_int_with_restart(struct sys_device *s, + struct sysdev_attribute *attr, + const char *buf, size_t size) +{ + ssize_t ret = sysdev_store_int(s, attr, buf, size); + mce_restart(); + return ret; +} + static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); -ACCESSOR(check_interval, check_interval, mce_restart()) +static struct sysdev_ext_attribute attr_check_interval = { + _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, + store_int_with_restart), + &check_interval +}; static struct sysdev_attribute *mce_attrs[] = { - &attr_tolerant.attr, &attr_check_interval, &attr_trigger, + &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, NULL }; -- cgit v1.2.2 From fc016a49c2d92f2efbe22c1fb66eb7a5d2a06ed1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:53 +0200 Subject: x86, mce: remove unused mce_events variable Remove unused mce_events static variable. [ Impact: cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3cac9da7ce24..69aad7e96a53 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -71,7 +71,6 @@ static u64 *bank; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; -static atomic_t mce_events; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -116,7 +115,6 @@ void mce_log(struct mce *mce) { unsigned next, entry; - atomic_inc(&mce_events); mce->finished = 0; wmb(); for (;;) { -- cgit v1.2.2 From 8be9110569aec1f65d86b08aef7ec49659137bf9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 May 2009 21:56:53 +0200 Subject: x86, mce: remove mce_init unused argument Remove unused mce_init argument. [ Impact: cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 69aad7e96a53..20c7e7c669d2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -643,7 +643,7 @@ static int mce_cap_init(void) return 0; } -static void mce_init(void *dummy) +static void mce_init(void) { mce_banks_t all_banks; u64 cap; @@ -776,7 +776,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) machine_check_vector = do_machine_check; - mce_init(NULL); + mce_init(); mce_cpu_features(c); mce_init_timer(); } @@ -1020,7 +1020,7 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(NULL); + mce_init(); mce_cpu_features(¤t_cpu_data); return 0; @@ -1030,7 +1030,7 @@ static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); if (mce_available(¤t_cpu_data)) - mce_init(NULL); + mce_init(); mce_init_timer(); } -- cgit v1.2.2 From 32561696c23028596f24b353d98f2e23b58f91f7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:53 +0200 Subject: x86, mce: rename and align out2 label There's only a single out path in do_machine_check now, so rename the label from out2 to out. Also align it at the first column. [ Impact: minor cleanup, no functional changes ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 20c7e7c669d2..18d505d80221 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -361,9 +361,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL) == NOTIFY_STOP) - goto out2; + goto out; if (!banks) - goto out2; + goto out; mce_setup(&m); @@ -499,7 +499,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); - out2: +out: atomic_dec(&mce_entry); sync_core(); } -- cgit v1.2.2 From b170204ddb7844ffff62d2d537b20c0eeb97725e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:54 +0200 Subject: x86, mce: drop BKL in mce_open BKL is not needed for anything in mce_open because it has an own spinlock. Remove it. [ Impact: cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18d505d80221..8ab28368bb92 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -791,12 +790,10 @@ static int open_exclu; /* already open exclusive? */ static int mce_open(struct inode *inode, struct file *file) { - lock_kernel(); spin_lock(&mce_state_lock); if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { spin_unlock(&mce_state_lock); - unlock_kernel(); return -EBUSY; } @@ -806,7 +803,6 @@ static int mce_open(struct inode *inode, struct file *file) open_count++; spin_unlock(&mce_state_lock); - unlock_kernel(); return nonseekable_open(inode, file); } -- cgit v1.2.2 From 9319cec8c185e84fc5281afb6ac5d4c47a234841 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 14 Apr 2009 17:26:30 +0900 Subject: x86, mce: use strict_strtoull Use strict_strtoull instead of simple_strtoull. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 9 ++++----- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 16 ++++++---------- 2 files changed, 10 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ab28368bb92..4375ffb5459f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1059,18 +1059,17 @@ static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, } static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) + const char *buf, size_t size) { - char *end; - u64 new = simple_strtoull(buf, &end, 0); + u64 new; - if (end == buf) + if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; bank[attr - bank_attrs] = new; mce_restart(); - return end-buf; + return size; } static ssize_t diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 083f270251fa..0c563432e25c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -269,14 +269,12 @@ SHOW_FIELDS(interrupt_enable) SHOW_FIELDS(threshold_limit) static ssize_t -store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) +store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) { struct thresh_restart tr; unsigned long new; - char *end; - new = simple_strtoul(buf, &end, 0); - if (end == buf) + if (strict_strtoul(buf, 0, &new) < 0) return -EINVAL; b->interrupt_enable = !!new; @@ -287,18 +285,16 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return end - buf; + return size; } static ssize_t -store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) +store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) { struct thresh_restart tr; unsigned long new; - char *end; - new = simple_strtoul(buf, &end, 0); - if (end == buf) + if (strict_strtoul(buf, 0, &new) < 0) return -EINVAL; if (new > THRESHOLD_MAX) @@ -313,7 +309,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return end - buf; + return size; } struct threshold_block_cross_cpu { -- cgit v1.2.2 From cc3aec52ab8e013984270a79d1aa51f691d239b0 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 30 Apr 2009 15:58:22 +0900 Subject: x86, mce: trivial clean up for therm_throt.c This patch removes following checkpatch warning: WARNING: Use #include instead of +#include Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index a2b5d7ddb19f..7b1ae2e20ba5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -20,7 +20,6 @@ #include #include -#include /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) -- cgit v1.2.2 From 14a02530e2239f753a0f3f089847e723adbdaa47 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 30 Apr 2009 16:04:51 +0900 Subject: x86, mce: trivial clean up for mce.c This fixs following checkpatch warnings: WARNING: Use #include instead of +#include WARNING: Use #include instead of +#include WARNING: line over 80 characters + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); WARNING: braces {} are not necessary for any arm of this statement + if (mce_notify_user()) { [...] + } else { [...] Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4375ffb5459f..1d0aa9c4e15b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -27,14 +28,13 @@ #include #include #include +#include #include #include -#include #include #include #include -#include #include "mce.h" @@ -125,7 +125,8 @@ void mce_log(struct mce *mce) * interesting ones: */ if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); + set_bit(MCE_OVERFLOW, + (unsigned long *)&mcelog.flags); return; } /* Old left over entry. Skip: */ @@ -556,11 +557,10 @@ static void mcheck_timer(unsigned long data) * polling interval, otherwise increase the polling interval. */ n = &__get_cpu_var(next_interval); - if (mce_notify_user()) { + if (mce_notify_user()) *n = max(*n/2, HZ/100); - } else { + else *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); - } t->expires = jiffies + *n; add_timer(t); -- cgit v1.2.2 From 34fa1967aa0827776e37feb5666df0327575a0f2 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 8 Apr 2009 12:31:18 +0200 Subject: x86, mce: trivial clean up for mce_amd_64.c Fix for followings: WARNING: Use #include instead of +#include ERROR: Macros with multiple statements should be enclosed in a do - while loop +#define THRESHOLD_ATTR(_name, _mode, _show, _store) \ +{ \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc + if (cpu >= NR_CPUS) Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 0c563432e25c..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include #include @@ -344,17 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b, return 1; } -#define THRESHOLD_ATTR(_name, _mode, _show, _store) \ -{ \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ +#define RW_ATTR(val) \ +static struct threshold_attr val = { \ + .attr = {.name = __stringify(val), .mode = 0644 }, \ + .show = show_## val, \ + .store = store_## val, \ }; -#define RW_ATTR(name) \ -static struct threshold_attr name = \ - THRESHOLD_ATTR(name, 0644, show_## name, store_## name) - RW_ATTR(interrupt_enable); RW_ATTR(threshold_limit); RW_ATTR(error_count); @@ -675,9 +671,6 @@ static void threshold_remove_device(unsigned int cpu) static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) { - if (cpu >= NR_CPUS) - return; - switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: -- cgit v1.2.2 From 61a021a0700c22ee527d73d92f9acb109ff478f8 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 14 Apr 2009 17:09:04 +0900 Subject: x86, mce: trivial clean up for mce_intel_64.c Fix for: WARNING: space prohibited between function name and open parenthesis '(' + for_each_online_cpu (cpu) { Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 13abafcb72e4..eff3740501a3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -196,7 +196,7 @@ void cmci_rediscover(int dying) return; cpumask_copy(old, ¤t->cpus_allowed); - for_each_online_cpu (cpu) { + for_each_online_cpu(cpu) { if (cpu == dying) continue; if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) -- cgit v1.2.2 From 98a9c8c3ba13dfc3df8e6d2a126d2fa4e4621e9c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 28 May 2009 11:41:01 +0900 Subject: x86, mce: trivial clean up for mce-inject.c Fix for: WARNING: Use #include instead of +#include WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc + if (m.cpu >= NR_CPUS || !cpu_online(m.cpu)) ERROR: trailing whitespace +/* $ Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 673c72855022..7b3a5428396a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -11,13 +11,13 @@ * Andi Kleen * Ying Huang */ +#include #include #include #include #include #include #include -#include #include /* Update fake mce registers on current CPU. */ @@ -93,7 +93,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, if (copy_from_user(&m, ubuf, usize)) return -EFAULT; - if (m.cpu >= NR_CPUS || !cpu_online(m.cpu)) + if (m.cpu >= num_possible_cpus() || !cpu_online(m.cpu)) return -EINVAL; dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); -- cgit v1.2.2 From eb2a6ab729ac40a553797703a5a5dba3a74de004 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Apr 2009 23:32:56 +0200 Subject: x86: trivial clean up for irq_vectors.h Fix a wrong comment. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 451e24d18050..233006c4e365 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -86,11 +86,10 @@ #define CALL_FUNCTION_VECTOR 0xfc #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa - #define THRESHOLD_APIC_VECTOR 0xf9 #ifdef CONFIG_X86_32 -/* 0xf9 : free */ +/* 0xf8 : free */ #else # define UV_BAU_MESSAGE 0xf8 #endif -- cgit v1.2.2 From cd13adcc823aa421efa4efd995fa7004a58cf38d Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 27 May 2009 16:57:31 +0900 Subject: x86: trivial clean up for arch/x86/Kconfig Use tab. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e25b6358fbe3..8c0fff0860bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -822,13 +822,13 @@ config X86_MCE_AMD the DRAM Error Threshold. config X86_ANCIENT_MCE - def_bool n - depends on X86_32 - prompt "Support for old Pentium 5 / WinChip machine checks" - ---help--- - Include support for machine check handling on old Pentium 5 or WinChip - systems. These typically need to be enabled explicitely on the command - line. + def_bool n + depends on X86_32 + prompt "Support for old Pentium 5 / WinChip machine checks" + ---help--- + Include support for machine check handling on old Pentium 5 or WinChip + systems. These typically need to be enabled explicitely on the command + line. config X86_MCE_THRESHOLD depends on X86_MCE_AMD || X86_MCE_INTEL -- cgit v1.2.2 From 38736072d45488fd45f076388b6570419bbbc682 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 28 May 2009 10:05:33 -0700 Subject: x86, mce: drop "extern" from function prototypes in asm/mce.h Function prototypes don't need to be prefixed by "extern". [ Impact: cleanup ] Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index e7d2372301ef..ac6e0303bf21 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -121,13 +121,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -extern int mce_available(struct cpuinfo_x86 *c); +int mce_available(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; -extern void do_machine_check(struct pt_regs *, long); +void do_machine_check(struct pt_regs *, long); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); @@ -137,15 +137,15 @@ enum mcp_flags { MCP_UC = (1 << 1), /* log uncorrected errors */ MCP_DONTLOG = (1 << 2), /* only clear, don't log */ }; -extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -extern int mce_notify_user(void); +int mce_notify_user(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; #ifdef CONFIG_X86_MCE -extern void mcheck_init(struct cpuinfo_x86 *c); +void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif -- cgit v1.2.2 From fee3c55d7fb9486f02d3285678d58e433ffe3c2a Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 29 May 2009 14:15:33 +0200 Subject: microblaze: Fix problem with early_printk in startup Problem arise when is incopatibility between kernel/dts/pvr and kernel tries to announce it. Early printk device (uartlite in our case) was in TLB 2 and when kernel extract DTB it necessary to allocate at least one TLB at the end of memory. First free TLB was number two where was early printk. But checking mechanism (kernel/dts/pvr) was after extrahing but TLB 2 was different. This caused that kernel hung up. Moving early printk device to TLB 63 solve it and we don't protect it which means that we can use early_printk messages only for initial parts of kernel then we rewrite TLB 63. Reported-by: Edgar E. Iglesias Signed-off-by: Michal Simek --- arch/microblaze/kernel/misc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S index d623efc9083c..df16c6287a8e 100644 --- a/arch/microblaze/kernel/misc.S +++ b/arch/microblaze/kernel/misc.S @@ -69,7 +69,7 @@ early_console_reg_tlb_alloc: * Load a TLB entry for the UART, so that microblaze_progress() can use * the UARTs nice and early. We use a 4k real==virtual mapping. */ - ori r4, r0, 2 + ori r4, r0, 63 mts rtlbx, r4 /* TLB slot 2 */ or r4,r5,r0 -- cgit v1.2.2 From 150c7e85526e80474b87004f4b420e8834fdeb43 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 29 Mar 2009 15:39:02 +0800 Subject: crypto: fpu - Add template for blkcipher touching FPU Blkcipher touching FPU need to be enclosed by kernel_fpu_begin() and kernel_fpu_end(). If they are invoked in cipher algorithm implementation, they will be invoked for each block, so that performance will be hurt, because they are "slow" operations. This patch implements "fpu" template, which makes these operations to be invoked for each request. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/fpu.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 arch/x86/crypto/fpu.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index ebe7deedd5b4..cfb0010fa940 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +obj-$(CONFIG_CRYPTO_FPU) += fpu.o + obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c new file mode 100644 index 000000000000..5f9781a3815f --- /dev/null +++ b/arch/x86/crypto/fpu.c @@ -0,0 +1,166 @@ +/* + * FPU: Wrapper for blkcipher touching fpu + * + * Copyright (c) Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +struct crypto_fpu_ctx { + struct crypto_blkcipher *child; +}; + +static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, + unsigned int keylen) +{ + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_blkcipher *child = ctx->child; + int err; + + crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_blkcipher_setkey(child, key, keylen); + crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int err; + struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); + struct crypto_blkcipher *child = ctx->child; + struct blkcipher_desc desc = { + .tfm = child, + .info = desc_in->info, + .flags = desc_in->flags, + }; + + kernel_fpu_begin(); + err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); + kernel_fpu_end(); + return err; +} + +static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int err; + struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); + struct crypto_blkcipher *child = ctx->child; + struct blkcipher_desc desc = { + .tfm = child, + .info = desc_in->info, + .flags = desc_in->flags, + }; + + kernel_fpu_begin(); + err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); + kernel_fpu_end(); + return err; +} + +static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_blkcipher *cipher; + + cipher = crypto_spawn_blkcipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + ctx->child = cipher; + return 0; +} + +static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_free_blkcipher(ctx->child); +} + +static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + if (err) + return ERR_PTR(err); + + alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(alg)) + return ERR_CAST(alg); + + inst = crypto_alloc_instance("fpu", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = alg->cra_flags; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = alg->cra_type; + inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; + inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; + inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; + inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); + inst->alg.cra_init = crypto_fpu_init_tfm; + inst->alg.cra_exit = crypto_fpu_exit_tfm; + inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; + inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; + inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void crypto_fpu_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_fpu_tmpl = { + .name = "fpu", + .alloc = crypto_fpu_alloc, + .free = crypto_fpu_free, + .module = THIS_MODULE, +}; + +static int __init crypto_fpu_module_init(void) +{ + return crypto_register_template(&crypto_fpu_tmpl); +} + +static void __exit crypto_fpu_module_exit(void) +{ + crypto_unregister_template(&crypto_fpu_tmpl); +} + +module_init(crypto_fpu_module_init); +module_exit(crypto_fpu_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FPU block cipher wrapper"); -- cgit v1.2.2 From 2cf4ac8beb9dc50a315a6155b7b70e754d511958 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 29 Mar 2009 15:41:20 +0800 Subject: crypto: aes-ni - Add support for more modes Because kernel_fpu_begin() and kernel_fpu_end() operations are too slow, the performance gain of general mode implementation + aes-aesni is almost all compensated. The AES-NI support for more modes are implemented as follow: - Add a new AES algorithm implementation named __aes-aesni without kernel_fpu_begin/end() - Use fpu((AES)) to provide kenrel_fpu_begin/end() invoking - Add (AES) ablkcipher, which uses cryptd(fpu((AES))) to defer cryption to cryptd context in soft_irq context. Now the ctr, lrw, pcbc and xts support are added. Performance testing based on dm-crypt shows that cryption time can be reduced to 50% of general mode implementation + aes-aesni implementation. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 267 ++++++++++++++++++++++++++++++++++++- 1 file changed, 266 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 02af0af65497..4e663398f77f 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -21,6 +21,22 @@ #include #include +#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) +#define HAS_CTR +#endif + +#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) +#define HAS_LRW +#endif + +#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) +#define HAS_PCBC +#endif + +#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) +#define HAS_XTS +#endif + struct async_aes_ctx { struct cryptd_ablkcipher *cryptd_tfm; }; @@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg = { } }; +static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_enc(ctx, dst, src); +} + +static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_dec(ctx, dst, src); +} + +static struct crypto_alg __aesni_alg = { + .cra_name = "__aes-aesni", + .cra_driver_name = "__driver-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = __aes_encrypt, + .cia_decrypt = __aes_decrypt + } + } +}; + static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -277,8 +328,16 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) { struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; - return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; } static int ablk_encrypt(struct ablkcipher_request *req) @@ -411,6 +470,163 @@ static struct crypto_alg ablk_cbc_alg = { }, }; +#ifdef HAS_CTR +static int ablk_ctr_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ctr_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), + .cra_init = ablk_ctr_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + .geniv = "chainiv", + }, + }, +}; +#endif + +#ifdef HAS_LRW +static int ablk_lrw_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_lrw_alg = { + .cra_name = "lrw(aes)", + .cra_driver_name = "lrw-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), + .cra_init = ablk_lrw_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_PCBC +static int ablk_pcbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_pcbc_alg = { + .cra_name = "pcbc(aes)", + .cra_driver_name = "pcbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), + .cra_init = ablk_pcbc_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_XTS +static int ablk_xts_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), + .cra_init = ablk_xts_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + static int __init aesni_init(void) { int err; @@ -421,6 +637,8 @@ static int __init aesni_init(void) } if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; + if ((err = crypto_register_alg(&__aesni_alg))) + goto __aes_err; if ((err = crypto_register_alg(&blk_ecb_alg))) goto blk_ecb_err; if ((err = crypto_register_alg(&blk_cbc_alg))) @@ -429,9 +647,41 @@ static int __init aesni_init(void) goto ablk_ecb_err; if ((err = crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; +#ifdef HAS_CTR + if ((err = crypto_register_alg(&ablk_ctr_alg))) + goto ablk_ctr_err; +#endif +#ifdef HAS_LRW + if ((err = crypto_register_alg(&ablk_lrw_alg))) + goto ablk_lrw_err; +#endif +#ifdef HAS_PCBC + if ((err = crypto_register_alg(&ablk_pcbc_alg))) + goto ablk_pcbc_err; +#endif +#ifdef HAS_XTS + if ((err = crypto_register_alg(&ablk_xts_alg))) + goto ablk_xts_err; +#endif return err; +#ifdef HAS_XTS +ablk_xts_err: +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +ablk_pcbc_err: +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +ablk_lrw_err: +#endif +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_ctr_alg); +ablk_ctr_err: +#endif + crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: @@ -439,6 +689,8 @@ ablk_ecb_err: blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); blk_ecb_err: + crypto_unregister_alg(&__aesni_alg); +__aes_err: crypto_unregister_alg(&aesni_alg); aes_err: return err; @@ -446,10 +698,23 @@ aes_err: static void __exit aesni_exit(void) { +#ifdef HAS_XTS + crypto_unregister_alg(&ablk_xts_alg); +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +#endif +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_ctr_alg); +#endif crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&__aesni_alg); crypto_unregister_alg(&aesni_alg); } -- cgit v1.2.2 From 77bc7ac87d0d6df1ea099a44e8fc4e998e409606 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 1 Jun 2009 21:26:04 -0700 Subject: microblaze: remove unused flat_stack_align() definition Signed-off-by: Andrew Morton Signed-off-by: Michal Simek --- arch/microblaze/include/asm/flat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index acf0da543ef1..6847c1512c7b 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -13,7 +13,6 @@ #include -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -- cgit v1.2.2 From 3447ef29a7f3b1fd0d8d58376950e695e04f6f8b Mon Sep 17 00:00:00 2001 From: John Williams Date: Tue, 24 Mar 2009 11:10:00 +1000 Subject: microblaze: Don't use access_ok for unaligned it assumes we have an unaligned exception handler which (a) may not be true (b) costs a lot of performance Instead we'll use struct/union method for big endian accesses, and byte-shifting for little endian. Signed-off-by: John Williams --- arch/microblaze/include/asm/unaligned.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/unaligned.h b/arch/microblaze/include/asm/unaligned.h index 9d66b640c910..3658d91ac0fb 100644 --- a/arch/microblaze/include/asm/unaligned.h +++ b/arch/microblaze/include/asm/unaligned.h @@ -12,7 +12,8 @@ # ifdef __KERNEL__ -# include +# include +# include # include # define get_unaligned __get_unaligned_be -- cgit v1.2.2 From c076b9937f4912ffc09b30e155fe5246f002f21a Mon Sep 17 00:00:00 2001 From: Ben Nizette Date: Wed, 3 Jun 2009 11:38:20 +0200 Subject: avr32: Fix clash in ATMEL_USART_ flags At the moment ATMEL_USART_{RTS,CTS,CLK} have the values 1, 2 and 3 respectively. Given these are used in bitmasks, trying to turn on the CLK line will in fact turn on the RTS and CTS lines as well. Change the value of ATMEL_USART_CLK to 4. Signed-off-by: Ben Nizette Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/include/mach/board.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h index 0b8164281899..ddedb471f33e 100644 --- a/arch/avr32/mach-at32ap/include/mach/board.h +++ b/arch/avr32/mach-at32ap/include/mach/board.h @@ -29,7 +29,7 @@ extern struct platform_device *atmel_default_console_device; /* Flags for selecting USART extra pins */ #define ATMEL_USART_RTS 0x01 #define ATMEL_USART_CTS 0x02 -#define ATMEL_USART_CLK 0x03 +#define ATMEL_USART_CLK 0x04 struct atmel_uart_data { short use_dma_tx; /* use transmit DMA? */ -- cgit v1.2.2 From c878b7d60418a45c36d99c2dc876ebb76035d404 Mon Sep 17 00:00:00 2001 From: Mark Jackson Date: Wed, 3 Jun 2009 11:16:38 +0100 Subject: Fix MIMC200 board LCD init This patch updates the LCD init code for the MIMC200 board. V2 fixes a .yres typo and corrects an incorrect setup value in the call to at32_add_device_lcdc() Without this patch, the lcd setup is wrong and won't display images correctly (if at all !!) Signed-off-by: Mark Jackson Signed-off-by: Haavard Skinnemoen --- arch/avr32/boards/mimc200/setup.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/avr32/boards/mimc200/setup.c b/arch/avr32/boards/mimc200/setup.c index c1b2175b4fea..523d8e183bef 100644 --- a/arch/avr32/boards/mimc200/setup.c +++ b/arch/avr32/boards/mimc200/setup.c @@ -43,16 +43,16 @@ unsigned long at32_board_osc_rates[3] = { /* Initialized by bootloader-specific startup code. */ struct tag *bootloader_tags __initdata; -static struct fb_videomode __initdata tx14d14_modes[] = { +static struct fb_videomode __initdata pt0434827_modes[] = { { - .name = "640x480 @ 60", - .refresh = 60, - .xres = 640, .yres = 480, - .pixclock = KHZ2PICOS(11666), + .name = "480x272 @ 72", + .refresh = 72, + .xres = 480, .yres = 272, + .pixclock = KHZ2PICOS(10000), - .left_margin = 80, .right_margin = 1, - .upper_margin = 13, .lower_margin = 2, - .hsync_len = 64, .vsync_len = 1, + .left_margin = 1, .right_margin = 1, + .upper_margin = 12, .lower_margin = 1, + .hsync_len = 42, .vsync_len = 1, .sync = 0, .vmode = FB_VMODE_NONINTERLACED, @@ -60,14 +60,14 @@ static struct fb_videomode __initdata tx14d14_modes[] = { }; static struct fb_monspecs __initdata mimc200_default_monspecs = { - .manufacturer = "HIT", - .monitor = "TX14D14VM1BAB", - .modedb = tx14d14_modes, - .modedb_len = ARRAY_SIZE(tx14d14_modes), + .manufacturer = "PT", + .monitor = "PT0434827-A401", + .modedb = pt0434827_modes, + .modedb_len = ARRAY_SIZE(pt0434827_modes), .hfmin = 14820, .hfmax = 22230, .vfmin = 60, - .vfmax = 73.3, + .vfmax = 85, .dclkmax = 25200000, }; @@ -228,7 +228,8 @@ static int __init mimc200_init(void) i2c_register_board_info(0, i2c_info, ARRAY_SIZE(i2c_info)); at32_add_device_lcdc(0, &mimc200_lcdc_data, - fbmem_start, fbmem_size, 1); + fbmem_start, fbmem_size, + ATMEL_LCDC_CONTROL | ATMEL_LCDC_ALT_CONTROL | ATMEL_LCDC_ALT_24B_DATA); return 0; } -- cgit v1.2.2 From 01ca79f1411eae2a45352709c838b946b1af9fbd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:52 +0200 Subject: x86, mce: add machine check exception count in /proc/interrupts Useful for debugging, but it's also good general policy to have a counter for all special interrupts there. This makes it easier to diagnose where a CPU is spending its time. [ Impact: feature, debugging tool ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 3 +++ arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ arch/x86/kernel/irq.c | 10 ++++++++++ 3 files changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ac6e0303bf21..1156dae295ac 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -89,6 +89,7 @@ struct mce_log { extern int mce_disabled; #include +#include void mce_setup(struct mce *m); void mce_log(struct mce *m); @@ -123,6 +124,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } int mce_available(struct cpuinfo_x86 *c); +DECLARE_PER_CPU(unsigned, mce_exception_count); + void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1d0aa9c4e15b..287268d21836 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -57,6 +57,8 @@ int mce_disabled; atomic_t mce_entry; +DEFINE_PER_CPU(unsigned, mce_exception_count); + /* * Tolerant levels: * 0: always panic on uncorrected errors, log corrected errors @@ -359,6 +361,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) atomic_inc(&mce_entry); + __get_cpu_var(mce_exception_count)++; + if (notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL) == NOTIFY_STOP) goto out; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index a05660bf0299..05fc635c28c0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include atomic_t irq_err_count; @@ -93,6 +94,12 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); # endif +#endif +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); + seq_printf(p, " Machine check exceptions\n"); #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) @@ -161,6 +168,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = irq_stats(cpu)->__nmi_count; +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + sum += per_cpu(mce_exception_count, cpu); +#endif #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; -- cgit v1.2.2 From ca84f69697da0f004135e45b63ca560b6bd3554e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:57 +0200 Subject: x86, mce: add MCE poll count to /proc/interrupts Keep a count of the machine check polls (or CMCI events) in /proc/interrupts. Andi needs this for debugging, but it's also useful in general to see what's going in by the kernel. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ arch/x86/kernel/irq.c | 4 ++++ 3 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1156dae295ac..63abf3b19432 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -125,6 +125,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); +DECLARE_PER_CPU(unsigned, mce_poll_count); void mce_log_therm_throt_event(__u64 status); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 287268d21836..784f6ae9d6f4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -264,6 +264,8 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) } } +DEFINE_PER_CPU(unsigned, mce_poll_count); + /* * Poll for corrected events or events that happened before reset. * Those are just logged through /dev/mcelog. @@ -275,6 +277,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) struct mce m; int i; + __get_cpu_var(mce_poll_count)++; + mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 05fc635c28c0..eff46b5de62f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -100,6 +100,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); seq_printf(p, " Machine check exceptions\n"); + seq_printf(p, "%*s: ", prec, "MCP"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); + seq_printf(p, " Machine check polls\n"); #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) -- cgit v1.2.2 From f6fb0ac0869500323c78fa21992fe1933af61e91 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:55 +0200 Subject: x86, mce: store record length into memory struct mce anchor This makes it easier for tools who want to extract the mcelog out of crash images or memory dumps to adapt to changing struct mce size. The length field replaces padding, so it's fully compatible. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 63abf3b19432..0a61946d4396 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -59,7 +59,7 @@ struct mce_log { unsigned len; /* = MCE_LOG_LEN */ unsigned next; unsigned flags; - unsigned pad0; + unsigned recordlen; /* length of struct mce */ struct mce entry[MCE_LOG_LEN]; }; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 784f6ae9d6f4..3db047e7a0fb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -108,8 +108,9 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm); */ static struct mce_log mcelog = { - MCE_LOG_SIGNATURE, - MCE_LOG_LEN, + .signature = MCE_LOG_SIGNATURE, + .len = MCE_LOG_LEN, + .recordlen = sizeof(struct mce), }; void mce_log(struct mce *mce) -- cgit v1.2.2 From d620c67fb92aa11736112f9a03e31d8e3079c57a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:56 +0200 Subject: x86, mce: support more than 256 CPUs in struct mce The old struct mce had a limitation to 256 CPUs. But x86 Linux supports more than that now with x2apic. Add a new field extcpu to report the extended number. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/mcheck/mce-inject.c | 10 +++++----- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 0a61946d4396..b4a04b60b740 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,9 +40,9 @@ struct mce { __u64 res2; /* dito. */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ - __u8 cpu; /* cpu that raised the error */ + __u8 cpu; /* cpu number; obsolete; use extcpu now */ __u8 finished; /* entry is valid */ - __u32 pad; + __u32 extcpu; /* linux cpu number that detected the error */ }; /* diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 7b3a5428396a..7d858fb4ce67 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -23,14 +23,14 @@ /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) { - struct mce *i = &per_cpu(injectm, m->cpu); + struct mce *i = &per_cpu(injectm, m->extcpu); /* Make sure noone reads partially written injectm */ i->finished = 0; mb(); m->finished = 0; /* First set the fields after finished */ - i->cpu = m->cpu; + i->extcpu = m->extcpu; mb(); /* Now write record in order, finished last (except above) */ memcpy(i, m, sizeof(struct mce)); @@ -49,7 +49,7 @@ static void raise_mce(unsigned long data) { struct delayed_mce *dm = (struct delayed_mce *)data; struct mce *m = &dm->m; - int cpu = m->cpu; + int cpu = m->extcpu; inject_mce(m); if (m->status & MCI_STATUS_UC) { @@ -93,7 +93,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, if (copy_from_user(&m, ubuf, usize)) return -EFAULT; - if (m.cpu >= num_possible_cpus() || !cpu_online(m.cpu)) + if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) return -EINVAL; dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); @@ -108,7 +108,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, memcpy(&dm->m, &m, sizeof(struct mce)); setup_timer(&dm->timer, raise_mce, (unsigned long)dm); dm->timer.expires = jiffies + 2; - add_timer_on(&dm->timer, m.cpu); + add_timer_on(&dm->timer, m.extcpu); return usize; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3db047e7a0fb..2c4dd6c422c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -94,7 +94,7 @@ static inline int skip_bank_init(int i) void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); - m->cpu = smp_processor_id(); + m->cpu = m->extcpu = smp_processor_id(); rdtscll(m->tsc); } @@ -158,7 +158,7 @@ static void print_mce(struct mce *m) KERN_EMERG "HARDWARE ERROR\n" KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", - m->cpu, m->mcgstatus, m->bank, m->status); + m->extcpu, m->mcgstatus, m->bank, m->status); if (m->ip) { printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", -- cgit v1.2.2 From 8ee08347c1e8b5680b3b3ce081e42e97bcaa1abe Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:56 +0200 Subject: x86, mce: extend struct mce user interface with more information. Experience has shown that struct mce which is used to pass an machine check to the user space daemon currently a few limitations. Also some data which is useful to print at panic level is also missing. This patch addresses most of them. The same information is also printed out together with mce panic. struct mce can be painlessly extended in a compatible way, the mcelog user space code just ignores additional fields with a warning. - It doesn't provide a wall time timestamp. There have been a few complaints about that. Fix that by adding a 64bit time_t - It doesn't provide the exact CPU identification. This makes it awkward for mcelog to decode the event correctly, especially when there are variations in the supported MCE codes on different CPU models or when mcelog is running on a different host after a panic. Previously the administrator had to specify the correct CPU when mcelog ran on a different host, but with the more variation in machine checks now it's better to auto detect that. It's also useful for more detailed analysis of CPU events. Pass CPUID 1.EAX and the cpu vendor (as encoded in processor.h) instead. - Socket ID and initial APIC ID are useful to report because they allow to identify the failing CPU in some (not all) cases. This is also especially useful for the panic situation. This addresses one of the complaints from Thomas Gleixner earlier. - The MCG capabilities MSR needs to be reported for some advanced error processing in mcelog Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 10 ++++++++-- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b4a04b60b740..ba1f8890cf51 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -36,13 +36,19 @@ struct mce { __u64 mcgstatus; __u64 ip; __u64 tsc; /* cpu time stamp counter */ - __u64 res1; /* for future extension */ - __u64 res2; /* dito. */ + __u64 time; /* wall time_t when error was detected */ + __u8 cpuvendor; /* cpu vendor as encoded in system.h */ + __u8 pad1; + __u16 pad2; + __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ __u8 cpu; /* cpu number; obsolete; use extcpu now */ __u8 finished; /* entry is valid */ __u32 extcpu; /* linux cpu number that detected the error */ + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial apic ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ }; /* diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2c4dd6c422c3..ba68449c22a1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -96,6 +96,15 @@ void mce_setup(struct mce *m) memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); rdtscll(m->tsc); + /* We hope get_seconds stays lockless */ + m->time = get_seconds(); + m->cpuvendor = boot_cpu_data.x86_vendor; + m->cpuid = cpuid_eax(1); +#ifdef CONFIG_SMP + m->socketid = cpu_data(m->extcpu).phys_proc_id; +#endif + m->apicid = cpu_data(m->extcpu).initial_apicid; + rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); } DEFINE_PER_CPU(struct mce, injectm); @@ -173,6 +182,9 @@ static void print_mce(struct mce *m) if (m->misc) printk("MISC %llx ", m->misc); printk("\n"); + printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, + m->apicid); printk(KERN_EMERG "This is not a software problem!\n"); printk(KERN_EMERG "Run through mcelog --ascii to decode " "and contact your hardware vendor\n"); -- cgit v1.2.2 From de8a84d85ad8bb46d01d72ebc57030b95075603c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:53 +0200 Subject: x86, mce: log corrected errors when panicing Normally the machine check handler ignores corrected errors and leaves them to machine_check_poll(). But when panicing mcp won't run, so log all errors. Note: this can still miss some cases until the "early no way out" patch later is applied too. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ba68449c22a1..86806e52fc40 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -412,9 +412,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* * Non uncorrected errors are handled by machine_check_poll - * Leave them alone. + * Leave them alone, unless this panics. */ - if ((m.status & MCI_STATUS_UC) == 0) + if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) continue; /* -- cgit v1.2.2 From a0189c70e5f17f4253dd7bc575c97469900e23d6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:54 +0200 Subject: x86, mce: remove TSC print heuristic Previously mce_panic used a simple heuristic to avoid printing old so far unreported machine check events on a mce panic. This worked by comparing the TSC value at the start of the machine check handler with the event time stamp and only printing newer ones. This has a couple of issues, in particular on systems where the TSC is not fully synchronized between CPUs it could lose events or print old ones. It is also problematic with full system synchronization as it is added by the next patch. Remove the TSC heuristic and instead replace it with a simple heuristic to print corrected errors first and after that uncorrected errors and finally the worst machine check as determined by the machine check handler. This simplifies the code because there is no need to pass the original TSC value around. Contains fixes from Ying Huang [ Impact: bug fix, cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Cc: Ying Huang Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 86806e52fc40..6773610061d8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -158,6 +158,7 @@ void mce_log(struct mce *mce) mcelog.entry[entry].finished = 1; wmb(); + mce->finished = 1; set_bit(0, ¬ify_user); } @@ -190,23 +191,29 @@ static void print_mce(struct mce *m) "and contact your hardware vendor\n"); } -static void mce_panic(char *msg, struct mce *backup, u64 start) +static void mce_panic(char *msg, struct mce *final) { int i; bust_spinlocks(1); console_verbose(); + /* First print corrected ones that are still unlogged */ for (i = 0; i < MCE_LOG_LEN; i++) { - u64 tsc = mcelog.entry[i].tsc; - - if ((s64)(tsc - start) < 0) + struct mce *m = &mcelog.entry[i]; + if ((m->status & MCI_STATUS_VAL) && + !(m->status & MCI_STATUS_UC)) + print_mce(m); + } + /* Now print uncorrected but with the final one last */ + for (i = 0; i < MCE_LOG_LEN; i++) { + struct mce *m = &mcelog.entry[i]; + if (!(m->status & MCI_STATUS_VAL)) continue; - print_mce(&mcelog.entry[i]); - if (backup && mcelog.entry[i].tsc == backup->tsc) - backup = NULL; + if (!final || memcmp(m, final, sizeof(struct mce))) + print_mce(m); } - if (backup) - print_mce(backup); + if (final) + print_mce(final); panic(msg); } @@ -362,7 +369,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mce m, panicm; int panicm_found = 0; - u64 mcestart = 0; int i; /* * If no_way_out gets set, there is no safe way to recover from this @@ -394,7 +400,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!(m.mcgstatus & MCG_STATUS_RIPV)) no_way_out = 1; - rdtscll(mcestart); barrier(); for (i = 0; i < banks; i++) { @@ -478,7 +483,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * has not set tolerant to an insane level, give up and die. */ if (no_way_out && tolerant < 3) - mce_panic("Machine check", &panicm, mcestart); + mce_panic("Machine check", &panicm); /* * If the error seems to be unrecoverable, something should be @@ -506,8 +511,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (user_space) { force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", - &panicm, mcestart); + mce_panic("Uncorrected machine check", &panicm); } } -- cgit v1.2.2 From 817f32d02a52dd7f5941534e0699883691e918df Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:54 +0200 Subject: x86, mce: add table driven machine check grading The machine check grading (as in deciding what should be done for a given register value) has to be done multiple times soon and it's also getting more complicated. So it makes sense to consolidate it into a single function. To get smaller and more straight forward and possibly more extensible code I opted towards a new table driven method. The various rules are put into a table when is then executed by a very simple interpreter. The grading engine is in a new file mce-severity.c. I also added a private include file mce-internal.h, because mce.h is already a bit too cluttered. This is dead code right now, but will be used in followon patches. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 1 + arch/x86/kernel/cpu/mcheck/mce-internal.h | 10 +++++ arch/x86/kernel/cpu/mcheck/mce-severity.c | 61 +++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 arch/x86/kernel/cpu/mcheck/mce-internal.h create mode 100644 arch/x86/kernel/cpu/mcheck/mce-severity.c (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 60ee182c6c52..45004faf67ea 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,5 +1,6 @@ obj-y = mce.o therm_throt.o +obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h new file mode 100644 index 000000000000..f126b4ae7a25 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -0,0 +1,10 @@ +#include + +enum severity_level { + MCE_NO_SEVERITY, + MCE_SOME_SEVERITY, + MCE_UC_SEVERITY, + MCE_PANIC_SEVERITY, +}; + +int mce_severity(struct mce *a, int tolerant, char **msg); diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c new file mode 100644 index 000000000000..c189e89a89ae --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -0,0 +1,61 @@ +/* + * MCE grading rules. + * Copyright 2008, 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Author: Andi Kleen + */ +#include +#include + +#include "mce-internal.h" + +/* + * Grade an mce by severity. In general the most severe ones are processed + * first. Since there are quite a lot of combinations test the bits in a + * table-driven way. The rules are simply processed in order, first + * match wins. + */ + +static struct severity { + u64 mask; + u64 result; + unsigned char sev; + unsigned char mcgmask; + unsigned char mcgres; + char *msg; +} severities[] = { +#define SEV(s) .sev = MCE_ ## s ## _SEVERITY +#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } +#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } +#define MCGMASK(x, res, s, m, r...) \ + { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } + BITCLR(MCI_STATUS_VAL, NO, "Invalid"), + BITCLR(MCI_STATUS_EN, NO, "Not enabled"), + BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), + MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), + BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), + BITSET(MCI_STATUS_UC, UC, "Uncorrected"), + BITSET(0, SOME, "No match") /* always matches. keep at end */ +}; + +int mce_severity(struct mce *a, int tolerant, char **msg) +{ + struct severity *s; + for (s = severities;; s++) { + if ((a->status & s->mask) != s->result) + continue; + if ((a->mcgstatus & s->mcgmask) != s->mcgres) + continue; + if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && + tolerant < 1) + return MCE_PANIC_SEVERITY; + if (msg) + *msg = s->msg; + return s->sev; + } +} -- cgit v1.2.2 From bd19a5e6b73df276e1ccedf9059e9ee70c372d7d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:55 +0200 Subject: x86, mce: check early in exception handler if panic is needed The exception handler should behave differently if the exception is fatal versus one that can be returned from. In the first case it should never clear any registers because these need to be preserved for logging after the next boot. Otherwise it should clear them on each CPU step by step so that other CPUs sharing the same bank don't see duplicate events. Otherwise we risk reporting events multiple times on any CPUs which have shared machine check banks, which is a common problem on Intel Nehalem which has both SMT (two CPU threads sharing banks) and shared machine check banks in the uncore. Determine early in a special pass if any event requires a panic. This uses the mce_severity() function added earlier. This is needed for the next patch. Also fixes a problem together with an earlier patch that corrected events weren't logged on a fatal MCE. [ Impact: Feature ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6773610061d8..5031814ac943 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -36,6 +36,7 @@ #include #include +#include "mce-internal.h" #include "mce.h" /* Handle unconfigured int18 (should never happen) */ @@ -191,7 +192,7 @@ static void print_mce(struct mce *m) "and contact your hardware vendor\n"); } -static void mce_panic(char *msg, struct mce *final) +static void mce_panic(char *msg, struct mce *final, char *exp) { int i; @@ -214,6 +215,8 @@ static void mce_panic(char *msg, struct mce *final) } if (final) print_mce(final); + if (exp) + printk(KERN_EMERG "Machine check: %s\n", exp); panic(msg); } @@ -357,6 +360,22 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) } EXPORT_SYMBOL_GPL(machine_check_poll); +/* + * Do a quick check if any of the events requires a panic. + * This decides if we keep the events around or clear them. + */ +static int mce_no_way_out(struct mce *m, char **msg) +{ + int i; + + for (i = 0; i < banks; i++) { + m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); + if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) + return 1; + } + return 0; +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -381,6 +400,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ int kill_it = 0; DECLARE_BITMAP(toclear, MAX_NR_BANKS); + char *msg = "Unknown"; atomic_inc(&mce_entry); @@ -395,10 +415,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - - /* if the restart IP is not valid, we're done for */ - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - no_way_out = 1; + no_way_out = mce_no_way_out(&m, &msg); barrier(); @@ -430,18 +447,13 @@ void do_machine_check(struct pt_regs *regs, long error_code) __set_bit(i, toclear); if (m.status & MCI_STATUS_EN) { - /* if PCC was set, there's no way out */ - no_way_out |= !!(m.status & MCI_STATUS_PCC); /* * If this error was uncorrectable and there was * an overflow, we're in trouble. If no overflow, * we might get away with just killing a task. */ - if (m.status & MCI_STATUS_UC) { - if (tolerant < 1 || m.status & MCI_STATUS_OVER) - no_way_out = 1; + if (m.status & MCI_STATUS_UC) kill_it = 1; - } } else { /* * Machine check event was not enabled. Clear, but @@ -483,7 +495,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * has not set tolerant to an insane level, give up and die. */ if (no_way_out && tolerant < 3) - mce_panic("Machine check", &panicm); + mce_panic("Machine check", &panicm, msg); /* * If the error seems to be unrecoverable, something should be @@ -511,7 +523,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (user_space) { force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", &panicm); + mce_panic("Uncorrected machine check", &panicm, msg); } } -- cgit v1.2.2 From ccc3c3192ae78dd56dcdf5353fd1a9ef5f9a3e2b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:54 +0200 Subject: x86, mce: implement bootstrapping for machine check wakeups Machine checks support waking up the mcelog daemon quickly. The original wake up code for this was pretty ugly, relying on a idle notifier and a special process flag. The reason it did it this way is that the machine check handler is not subject to normal interrupt locking rules so it's not safe to call wake_up(). Instead it set a process flag and then either did the wakeup in the syscall return or in the idle notifier. This patch adds a new "bootstraping" method as replacement. The idea is that the handler checks if it's in a state where it is unsafe to call wake_up(). If it's safe it calls it directly. When it's not safe -- that is it interrupted in a critical section with interrupts disables -- it uses a new "self IPI" to trigger an IPI to its own CPU. This can be done safely because IPI triggers are atomic with some care. The IPI is raised once the interrupts are reenabled and can then safely call wake_up(). When APICs are disabled the event is just queued and will be picked up eventually by the next polling timer. I think that's a reasonable compromise, since it should only happen quite rarely. Contains fixes from Ying Huang. [ solve conflict on irqinit, make it work on 32bit (entry_arch.h) - HS ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/entry_arch.h | 4 +++ arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 5 ++++ arch/x86/kernel/cpu/mcheck/mce.c | 54 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/entry_64.S | 5 ++++ arch/x86/kernel/irqinit.c | 3 +++ 6 files changed, 72 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index b2eb9c066843..4cdcf5a3c96b 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -60,4 +60,8 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) #endif +#ifdef CONFIG_X86_NEW_MCE +BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) +#endif + #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index a7d14bbae110..4e59197e29ba 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -32,6 +32,7 @@ extern void error_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); +extern void mce_self_interrupt(void); extern void invalidate_interrupt(void); extern void invalidate_interrupt0(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8c46b851296a..68f7cf84a333 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -117,6 +117,11 @@ */ #define GENERIC_INTERRUPT_VECTOR 0xed +/* + * Self IPI vector for machine checks + */ +#define MCE_SELF_VECTOR 0xeb + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5031814ac943..121781627858 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,10 @@ #include #include +#include +#include #include +#include #include #include @@ -287,6 +291,54 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) } } +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Called after interrupts have been reenabled again + * when a MCE happened during an interrupts off region + * in the kernel. + */ +asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) +{ + ack_APIC_irq(); + exit_idle(); + irq_enter(); + mce_notify_user(); + irq_exit(); +} +#endif + +static void mce_report_event(struct pt_regs *regs) +{ + if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { + mce_notify_user(); + return; + } + +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Without APIC do not notify. The event will be picked + * up eventually. + */ + if (!cpu_has_apic) + return; + + /* + * When interrupts are disabled we cannot use + * kernel services safely. Trigger an self interrupt + * through the APIC to instead do the notification + * after interrupts are reenabled again. + */ + apic->send_IPI_self(MCE_SELF_VECTOR); + + /* + * Wait for idle afterwards again so that we don't leave the + * APIC in a non idle state because the normal APIC writes + * cannot exclude us. + */ + apic_wait_icr_idle(); +#endif +} + DEFINE_PER_CPU(unsigned, mce_poll_count); /* @@ -530,6 +582,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); + mce_report_event(regs); + /* the last thing we do is clear state */ for (i = 0; i < banks; i++) { if (test_bit(i, toclear)) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a31a7f29cffe..711c130a8411 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1011,6 +1011,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \ apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt +#ifdef CONFIG_X86_MCE +apicinterrupt MCE_SELF_VECTOR \ + mce_self_interrupt smp_mce_self_interrupt +#endif + #ifdef CONFIG_SMP apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ call_function_single_interrupt smp_call_function_single_interrupt diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index aab3d277766c..441f6ec6e9d4 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -187,6 +187,9 @@ static void __init apic_intr_init(void) #ifdef CONFIG_X86_THRESHOLD alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif +#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) + alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); +#endif #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ -- cgit v1.2.2 From f94b61c2c9fdcc90773c49df9ccf9ede3ad0d7db Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:55 +0200 Subject: x86, mce: implement panic synchronization In some circumstances multiple CPUs can enter mce_panic() in parallel. This gives quite confused output because they will all dump the same machine check buffer. The other problem is that they would all panic in parallel, but not process each other's shutdown IPIs because interrupts are disabled. Detect this situation early on in mce_panic(). On the first CPU entering will do the panic, the others will just wait to be killed. For paranoia reasons in case the other CPU dies during the MCE I added a 5 seconds timeout. If it expires each CPU will panic on its own again. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 121781627858..421020f1d7db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -196,10 +196,32 @@ static void print_mce(struct mce *m) "and contact your hardware vendor\n"); } +#define PANIC_TIMEOUT 5 /* 5 seconds */ + +static atomic_t mce_paniced; + +/* Panic in progress. Enable interrupts and wait for final IPI */ +static void wait_for_panic(void) +{ + long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); + local_irq_enable(); + while (timeout-- > 0) + udelay(1); + panic("Panicing machine check CPU died"); +} + static void mce_panic(char *msg, struct mce *final, char *exp) { int i; + /* + * Make sure only one CPU runs in machine check panic + */ + if (atomic_add_return(1, &mce_paniced) > 1) + wait_for_panic(); + barrier(); + bust_spinlocks(1); console_verbose(); /* First print corrected ones that are still unlogged */ -- cgit v1.2.2 From 3c0797925f4ef9d55a32059d2af61a9c262e639d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:55 +0200 Subject: x86, mce: switch x86 machine check handler to Monarch election. On Intel platforms machine check exceptions are always broadcast to all CPUs. This patch makes the machine check handler synchronize all these machine checks, elect a Monarch to handle the event and collect the worst event from all CPUs and then process it first. This has some advantages: - When there is a truly data corrupting error the system panics as quickly as possible. This improves containment of corrupted data and makes sure the corrupted data never hits stable storage. - The panics are synchronized and do not reenter the panic code on multiple CPUs (which currently does not handle this well). - All the errors are reported. Currently it often happens that another CPU happens to do the panic first, but reports useless information (empty machine check) because the real error happened on another CPU which came in later. This is a big advantage on Nehalem where the 8 threads per CPU lead to often the wrong CPU winning the race and dumping useless information on a machine check. The problem also occurs in a less severe form on older CPUs. - The system can detect when no CPUs detected a machine check and shut down the system. This can happen when one CPU is so badly hung that that it cannot process a machine check anymore or when some external agent wants to stop the system by asserting the machine check pin. This follows Intel hardware recommendations. - This matches the recommended error model by the CPU designers. - The events can be output in true severity order - When a panic happens on another CPU it makes sure to be actually be able to process the stop IPI by enabling interrupts. The code is extremly careful to handle timeouts while waiting for other CPUs. It can't rely on the normal timing mechanisms (jiffies, ktime_get) because of its asynchronous/lockless nature, so it uses own timeouts using ndelay() and a "SPINUNIT" The timeout is configurable. By default it waits for upto one second for the other CPUs. This can be also disabled. From some informal testing AMD systems do not see to broadcast machine checks, so right now it's always disabled by default on non Intel CPUs or also on very old Intel systems. Includes fixes from Ying Huang Fixed a "ecception" in a comment (H.Seto) Moved global_nwo reset later based on suggestion from H.Seto v2: Avoid duplicate messages [ Impact: feature, fixes long standing problems. ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 360 +++++++++++++++++++++++++++++++++++---- 1 file changed, 331 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 421020f1d7db..ba431893e31d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +62,8 @@ int mce_disabled; #define MISC_MCELOG_MINOR 227 +#define SPINUNIT 100 /* 100ns */ + atomic_t mce_entry; DEFINE_PER_CPU(unsigned, mce_exception_count); @@ -77,6 +81,7 @@ static u64 *bank; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; +static int monarch_timeout = -1; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -84,6 +89,9 @@ static char *trigger_argv[2] = { trigger, NULL }; static unsigned long dont_init_banks; static DECLARE_WAIT_QUEUE_HEAD(mce_wait); +static DEFINE_PER_CPU(struct mce, mces_seen); +static int cpu_missing; + /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -241,6 +249,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp) } if (final) print_mce(final); + if (cpu_missing) + printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); if (exp) printk(KERN_EMERG "Machine check: %s\n", exp); panic(msg); @@ -450,6 +460,264 @@ static int mce_no_way_out(struct mce *m, char **msg) return 0; } +/* + * Variable to establish order between CPUs while scanning. + * Each CPU spins initially until executing is equal its number. + */ +static atomic_t mce_executing; + +/* + * Defines order of CPUs on entry. First CPU becomes Monarch. + */ +static atomic_t mce_callin; + +/* + * Check if a timeout waiting for other CPUs happened. + */ +static int mce_timed_out(u64 *t) +{ + /* + * The others already did panic for some reason. + * Bail out like in a timeout. + * rmb() to tell the compiler that system_state + * might have been modified by someone else. + */ + rmb(); + if (atomic_read(&mce_paniced)) + wait_for_panic(); + if (!monarch_timeout) + goto out; + if ((s64)*t < SPINUNIT) { + /* CHECKME: Make panic default for 1 too? */ + if (tolerant < 1) + mce_panic("Timeout synchronizing machine check over CPUs", + NULL, NULL); + cpu_missing = 1; + return 1; + } + *t -= SPINUNIT; +out: + touch_nmi_watchdog(); + return 0; +} + +/* + * The Monarch's reign. The Monarch is the CPU who entered + * the machine check handler first. It waits for the others to + * raise the exception too and then grades them. When any + * error is fatal panic. Only then let the others continue. + * + * The other CPUs entering the MCE handler will be controlled by the + * Monarch. They are called Subjects. + * + * This way we prevent any potential data corruption in a unrecoverable case + * and also makes sure always all CPU's errors are examined. + * + * Also this detects the case of an machine check event coming from outer + * space (not detected by any CPUs) In this case some external agent wants + * us to shut down, so panic too. + * + * The other CPUs might still decide to panic if the handler happens + * in a unrecoverable place, but in this case the system is in a semi-stable + * state and won't corrupt anything by itself. It's ok to let the others + * continue for a bit first. + * + * All the spin loops have timeouts; when a timeout happens a CPU + * typically elects itself to be Monarch. + */ +static void mce_reign(void) +{ + int cpu; + struct mce *m = NULL; + int global_worst = 0; + char *msg = NULL; + char *nmsg = NULL; + + /* + * This CPU is the Monarch and the other CPUs have run + * through their handlers. + * Grade the severity of the errors of all the CPUs. + */ + for_each_possible_cpu(cpu) { + int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, + &nmsg); + if (severity > global_worst) { + msg = nmsg; + global_worst = severity; + m = &per_cpu(mces_seen, cpu); + } + } + + /* + * Cannot recover? Panic here then. + * This dumps all the mces in the log buffer and stops the + * other CPUs. + */ + if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) + mce_panic("Fatal machine check", m, msg); + + /* + * For UC somewhere we let the CPU who detects it handle it. + * Also must let continue the others, otherwise the handling + * CPU could deadlock on a lock. + */ + + /* + * No machine check event found. Must be some external + * source or one CPU is hung. Panic. + */ + if (!m && tolerant < 3) + mce_panic("Machine check from unknown source", NULL, NULL); + + /* + * Now clear all the mces_seen so that they don't reappear on + * the next mce. + */ + for_each_possible_cpu(cpu) + memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); +} + +static atomic_t global_nwo; + +/* + * Start of Monarch synchronization. This waits until all CPUs have + * entered the exception handler and then determines if any of them + * saw a fatal event that requires panic. Then it executes them + * in the entry order. + * TBD double check parallel CPU hotunplug + */ +static int mce_start(int no_way_out, int *order) +{ + int nwo; + int cpus = num_online_cpus(); + u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; + + if (!timeout) { + *order = -1; + return no_way_out; + } + + atomic_add(no_way_out, &global_nwo); + + /* + * Wait for everyone. + */ + while (atomic_read(&mce_callin) != cpus) { + if (mce_timed_out(&timeout)) { + atomic_set(&global_nwo, 0); + *order = -1; + return no_way_out; + } + ndelay(SPINUNIT); + } + + /* + * Cache the global no_way_out state. + */ + nwo = atomic_read(&global_nwo); + + /* + * Monarch starts executing now, the others wait. + */ + if (*order == 1) { + atomic_set(&mce_executing, 1); + return nwo; + } + + /* + * Now start the scanning loop one by one + * in the original callin order. + * This way when there are any shared banks it will + * be only seen by one CPU before cleared, avoiding duplicates. + */ + while (atomic_read(&mce_executing) < *order) { + if (mce_timed_out(&timeout)) { + atomic_set(&global_nwo, 0); + *order = -1; + return no_way_out; + } + ndelay(SPINUNIT); + } + return nwo; +} + +/* + * Synchronize between CPUs after main scanning loop. + * This invokes the bulk of the Monarch processing. + */ +static int mce_end(int order) +{ + int ret = -1; + u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; + + if (!timeout) + goto reset; + if (order < 0) + goto reset; + + /* + * Allow others to run. + */ + atomic_inc(&mce_executing); + + if (order == 1) { + /* CHECKME: Can this race with a parallel hotplug? */ + int cpus = num_online_cpus(); + + /* + * Monarch: Wait for everyone to go through their scanning + * loops. + */ + while (atomic_read(&mce_executing) <= cpus) { + if (mce_timed_out(&timeout)) + goto reset; + ndelay(SPINUNIT); + } + + mce_reign(); + barrier(); + ret = 0; + } else { + /* + * Subject: Wait for Monarch to finish. + */ + while (atomic_read(&mce_executing) != 0) { + if (mce_timed_out(&timeout)) + goto reset; + ndelay(SPINUNIT); + } + + /* + * Don't reset anything. That's done by the Monarch. + */ + return 0; + } + + /* + * Reset all global state. + */ +reset: + atomic_set(&global_nwo, 0); + atomic_set(&mce_callin, 0); + barrier(); + + /* + * Let others run again. + */ + atomic_set(&mce_executing, 0); + return ret; +} + +static void mce_clear_state(unsigned long *toclear) +{ + int i; + + for (i = 0; i < banks; i++) { + if (test_bit(i, toclear)) + mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -457,12 +725,23 @@ static int mce_no_way_out(struct mce *m, char **msg) * This is executed in NMI context not subject to normal locking rules. This * implies that most kernel services cannot be safely used. Don't even * think about putting a printk in there! + * + * On Intel systems this is entered on all CPUs in parallel through + * MCE broadcast. However some CPUs might be broken beyond repair, + * so be always careful when synchronizing with others. */ void do_machine_check(struct pt_regs *regs, long error_code) { - struct mce m, panicm; - int panicm_found = 0; + struct mce m, *final; int i; + int worst = 0; + int severity; + /* + * Establish sequential order between the CPUs entering the machine + * check handler. + */ + int order; + /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. @@ -486,13 +765,23 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!banks) goto out; + order = atomic_add_return(1, &mce_callin); mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); no_way_out = mce_no_way_out(&m, &msg); + final = &__get_cpu_var(mces_seen); + *final = m; + barrier(); + /* + * Go through all the banks in exclusion of the other CPUs. + * This way we don't report duplicated events on shared banks + * because the first one to see it will clear it. + */ + no_way_out = mce_start(no_way_out, &order); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); if (!bank[i]) @@ -544,32 +833,32 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_get_rip(&m, regs); mce_log(&m); - /* - * Did this bank cause the exception? - * - * Assume that the bank with uncorrectable errors did it, - * and that there is only a single one: - */ - if ((m.status & MCI_STATUS_UC) && - (m.status & MCI_STATUS_EN)) { - panicm = m; - panicm_found = 1; + severity = mce_severity(&m, tolerant, NULL); + if (severity > worst) { + *final = m; + worst = severity; } } + if (!no_way_out) + mce_clear_state(toclear); + /* - * If we didn't find an uncorrectable error, pick - * the last one (shouldn't happen, just being safe). + * Do most of the synchronization with other CPUs. + * When there's any problem use only local no_way_out state. */ - if (!panicm_found) - panicm = m; + if (mce_end(order) < 0) + no_way_out = worst >= MCE_PANIC_SEVERITY; /* * If we have decided that we just CAN'T continue, and the user * has not set tolerant to an insane level, give up and die. + * + * This is mainly used in the case when the system doesn't + * support MCE broadcasting or it has been disabled. */ if (no_way_out && tolerant < 3) - mce_panic("Machine check", &panicm, msg); + mce_panic("Machine check", final, msg); /* * If the error seems to be unrecoverable, something should be @@ -585,7 +874,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * instruction which caused the MCE. */ if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = panicm.ip && (panicm.cs & 3); + user_space = final->ip && (final->cs & 3); /* * If we know that the error was in user space, send a @@ -597,20 +886,15 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (user_space) { force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", &panicm, msg); + mce_panic("Uncorrected machine check", final, msg); } } /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); - mce_report_event(regs); - - /* the last thing we do is clear state */ - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } + if (worst > 0) + mce_report_event(regs); mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); out: atomic_dec(&mce_entry); @@ -821,7 +1105,17 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model < 0x1A) __set_bit(0, &dont_init_banks); + + /* + * All newer Intel systems support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && + monarch_timeout < 0) + monarch_timeout = USEC_PER_SEC; } + if (monarch_timeout < 0) + monarch_timeout = 0; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) @@ -1068,7 +1362,9 @@ static struct miscdevice mce_log_device = { /* * mce=off disables machine check - * mce=TOLERANCELEVEL (number, see above) + * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) + * monarchtimeout is how long to wait for other CPUs on machine + * check, or 0 to not wait * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. */ @@ -1082,9 +1378,13 @@ static int __init mcheck_enable(char *str) mce_disabled = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) mce_bootlog = (str[0] == 'b'); - else if (isdigit(str[0])) + else if (isdigit(str[0])) { get_option(&str, &tolerant); - else { + if (*str == ',') { + ++str; + get_option(&str, &monarch_timeout); + } + } else { printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", str); return 0; @@ -1221,6 +1521,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); +static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); static struct sysdev_ext_attribute attr_check_interval = { _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, @@ -1230,6 +1531,7 @@ static struct sysdev_ext_attribute attr_check_interval = { static struct sysdev_attribute *mce_attrs[] = { &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, + &attr_monarch_timeout.attr, NULL }; -- cgit v1.2.2 From ac9603754dc7e286e62ae4f1067958d5b0075f99 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:58 +0200 Subject: x86, mce: make non Monarch panic message "Fatal machine check" too ... instead of "Machine check". This is for consistency with the Monarch panic message. Based on a report from Ying Huang. v2: But add a descriptive postfix so that the test suite can distingush. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ba431893e31d..d5cb0b4c17ff 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -554,7 +554,7 @@ static void mce_reign(void) * other CPUs. */ if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) - mce_panic("Fatal machine check", m, msg); + mce_panic("Fatal Machine check", m, msg); /* * For UC somewhere we let the CPU who detects it handle it. @@ -858,7 +858,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * support MCE broadcasting or it has been disabled. */ if (no_way_out && tolerant < 3) - mce_panic("Machine check", final, msg); + mce_panic("Fatal machine check on current CPU", final, msg); /* * If the error seems to be unrecoverable, something should be -- cgit v1.2.2 From 1b2797dcc9f0ad89bc382ace26c6baafbc7e33c2 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 27 May 2009 21:56:51 +0200 Subject: x86, mce: improve mce_get_rip Assume IP on the stack is valid when either EIPV or RIPV are set. This influences whether the machine check exception handler decides to return or panic. This fixes a test case in the mce-test suite and is more compliant to the specification. This currently only makes a difference in a artificial testing scenario with the mce-test test suite. Also in addition do not force the EIPV to be valid with the exact register MSRs, and keep in trust the CS value on stack even if MSR is available. [AK: combination of patches from Huang Ying and Hidetoshi Seto, with new description by me] [add some description, no code changed - HS] Signed-off-by: Huang Ying Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d5cb0b4c17ff..a7dc369a9974 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -306,21 +306,22 @@ int mce_available(struct cpuinfo_x86 *c) return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } +/* + * Get the address of the instruction at the time of the machine check + * error. + */ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) { - if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { + + if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { m->ip = regs->ip; m->cs = regs->cs; } else { m->ip = 0; m->cs = 0; } - if (rip_msr) { - /* Assume the RIP in the MSR is exact. Is this true? */ - m->mcgstatus |= MCG_STATUS_EIPV; + if (rip_msr) m->ip = mce_rdmsrl(rip_msr); - m->cs = 0; - } } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.2 From 29b0f591d678838435fbb3e15ef20266f1a9e01d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:56 +0200 Subject: x86, mce: default to panic timeout for machine checks Fatal machine checks can be logged to disk after boot, but only if the system did a warm reboot. That's unfortunately difficult with the default panic behaviour, which waits forever and the admin has to press the power button because modern systems usually miss a reset button. This clears the machine checks in the registers and make it impossible to log them. This patch changes the default for machine check panic to always reboot after 30s. Then the mce can be successfully logged after reboot. I believe this will improve machine check experience for any system running the X server. This is dependent on successfull boot logging of MCEs. This currently only works on Intel systems, on AMD there are quite a lot of systems around which leave junk in the machine check registers after boot, so it's disabled here. These systems will continue to default to endless waiting panic. v2: Only force panic timeout when it's shorter (H.Seto) v3: Only force timeout when there is no timeout (based on comment H.Seto) [ Fix changelog - HS ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7dc369a9974..79d243145b8f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -82,6 +82,7 @@ static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; +static int mce_panic_timeout; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -216,6 +217,8 @@ static void wait_for_panic(void) local_irq_enable(); while (timeout-- > 0) udelay(1); + if (panic_timeout == 0) + panic_timeout = mce_panic_timeout; panic("Panicing machine check CPU died"); } @@ -253,6 +256,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp) printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); if (exp) printk(KERN_EMERG "Machine check: %s\n", exp); + if (panic_timeout == 0) + panic_timeout = mce_panic_timeout; panic(msg); } @@ -1117,6 +1122,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) } if (monarch_timeout < 0) monarch_timeout = 0; + if (mce_bootlog != 0) + mce_panic_timeout = 30; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) -- cgit v1.2.2 From 86503560e48153aba539ff117450d31ab2ef76d7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:58 +0200 Subject: x86, mce: print header/footer only once for multiple MCEs When multiple MCEs are printed print the "HARDWARE ERROR" header and "This is not a software error" footer only once. This makes the output much more compact with many CPUs. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 79d243145b8f..ff9c732989de 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -176,11 +176,13 @@ void mce_log(struct mce *mce) set_bit(0, ¬ify_user); } -static void print_mce(struct mce *m) +static void print_mce(struct mce *m, int *first) { - printk(KERN_EMERG "\n" - KERN_EMERG "HARDWARE ERROR\n" - KERN_EMERG + if (*first) { + printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); + *first = 0; + } + printk(KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); if (m->ip) { @@ -200,9 +202,12 @@ static void print_mce(struct mce *m) printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - printk(KERN_EMERG "This is not a software problem!\n"); - printk(KERN_EMERG "Run through mcelog --ascii to decode " - "and contact your hardware vendor\n"); +} + +static void print_mce_tail(void) +{ + printk(KERN_EMERG "This is not a software problem!\n" + KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -225,6 +230,7 @@ static void wait_for_panic(void) static void mce_panic(char *msg, struct mce *final, char *exp) { int i; + int first = 1; /* * Make sure only one CPU runs in machine check panic @@ -240,7 +246,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) struct mce *m = &mcelog.entry[i]; if ((m->status & MCI_STATUS_VAL) && !(m->status & MCI_STATUS_UC)) - print_mce(m); + print_mce(m, &first); } /* Now print uncorrected but with the final one last */ for (i = 0; i < MCE_LOG_LEN; i++) { @@ -248,12 +254,13 @@ static void mce_panic(char *msg, struct mce *final, char *exp) if (!(m->status & MCI_STATUS_VAL)) continue; if (!final || memcmp(m, final, sizeof(struct mce))) - print_mce(m); + print_mce(m, &first); } if (final) - print_mce(final); + print_mce(final, &first); if (cpu_missing) printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); + print_mce_tail(); if (exp) printk(KERN_EMERG "Machine check: %s\n", exp); if (panic_timeout == 0) -- cgit v1.2.2 From ed7290d0ee8f81aa78bfe816f01b012f208cafc5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:57 +0200 Subject: x86, mce: implement new status bits The x86 architecture recently added some new machine check status bits: S(ignalled) and AR (Action-Required). Signalled allows to check if a specific event caused an exception or was just logged through CMCI. AR allows the kernel to decide if an event needs immediate action or can be delayed or ignored. Implement support for these new status bits. mce_severity() uses the new bits to grade the machine check correctly and decide what to do. The exception handler uses AR to decide to kill or not. The S bit is used to separate events between the poll/CMCI handler and the exception handler. Classical UC always leads to panic. That was true before anyways because the existing CPUs always passed a PCC with it. Also corrects the rules whether to kill in user or kernel context and how to handle missing RIPV. The machine check handler largely uses the mce-severity grading engine now instead of making its own decisions. This means the logic is centralized in one place. This is useful because it has to be evaluated multiple times. v2: Some rule fixes; Add AO events Fix RIPV, RIPV|EIPV order (Ying Huang) Fix UCNA with AR=1 message (Ying Huang) Add comment about panicing in m_c_p. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 10 ++++ arch/x86/kernel/cpu/mcheck/mce-internal.h | 5 ++ arch/x86/kernel/cpu/mcheck/mce-severity.c | 82 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/mcheck/mce.c | 84 ++++++++++++++++--------------- 4 files changed, 137 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ba1f8890cf51..afd3cdf6f8ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -15,6 +15,7 @@ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ @@ -27,6 +28,15 @@ #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ + +/* MISC register defines */ +#define MCM_ADDR_SEGOFF 0 /* segment offset */ +#define MCM_ADDR_LINEAR 1 /* linear address */ +#define MCM_ADDR_PHYS 2 /* physical address */ +#define MCM_ADDR_MEM 3 /* memory address */ +#define MCM_ADDR_GENERIC 7 /* generic */ /* Fields are zero when not available */ struct mce { diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index f126b4ae7a25..54dcb8ff12e5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -2,9 +2,14 @@ enum severity_level { MCE_NO_SEVERITY, + MCE_KEEP_SEVERITY, MCE_SOME_SEVERITY, + MCE_AO_SEVERITY, MCE_UC_SEVERITY, + MCE_AR_SEVERITY, MCE_PANIC_SEVERITY, }; int mce_severity(struct mce *a, int tolerant, char **msg); + +extern int mce_ser; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c189e89a89ae..4f4d2caf4043 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -19,43 +19,117 @@ * first. Since there are quite a lot of combinations test the bits in a * table-driven way. The rules are simply processed in order, first * match wins. + * + * Note this is only used for machine check exceptions, the corrected + * errors use much simpler rules. The exceptions still check for the corrected + * errors, but only to leave them alone for the CMCI handler (except for + * panic situations) */ +enum context { IN_KERNEL = 1, IN_USER = 2 }; +enum ser { SER_REQUIRED = 1, NO_SER = 2 }; + static struct severity { u64 mask; u64 result; unsigned char sev; unsigned char mcgmask; unsigned char mcgres; + unsigned char ser; + unsigned char context; char *msg; } severities[] = { +#define KERNEL .context = IN_KERNEL +#define USER .context = IN_USER +#define SER .ser = SER_REQUIRED +#define NOSER .ser = NO_SER #define SEV(s) .sev = MCE_ ## s ## _SEVERITY #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } #define MCGMASK(x, res, s, m, r...) \ { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } +#define MASK(x, y, s, m, r...) \ + { .mask = x, .result = y, SEV(s), .msg = m, ## r } +#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) +#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) +#define MCACOD 0xffff + BITCLR(MCI_STATUS_VAL, NO, "Invalid"), BITCLR(MCI_STATUS_EN, NO, "Not enabled"), BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), - MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), + /* When MCIP is not set something is very confused */ + MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), + /* Neither return not error IP -- no chance to recover -> PANIC */ + MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, + "Neither restart nor error IP"), + MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", + KERNEL), + BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), + MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, + "Spurious not enabled", SER), + + /* ignore OVER for UCNA */ + MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, + "Uncorrected no action required", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, + "Illegal combination (UCNA with AR=1)", SER), + MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), + + /* AR add known MCACODs here */ + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, + "Action required with lost events", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, + "Action required; unknown MCACOD", SER), + + /* known AO MCACODs: */ + MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, + "Action optional: memory scrubbing error", SER), + MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, + "Action optional: last level cache writeback error", SER), + + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, + "Action optional unknown MCACOD", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, + "Action optional with lost events", SER), BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), BITSET(MCI_STATUS_UC, UC, "Uncorrected"), BITSET(0, SOME, "No match") /* always matches. keep at end */ }; +/* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ +static int error_context(struct mce *m) +{ + if (m->mcgstatus & MCG_STATUS_EIPV) + return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; + /* Unknown, assume kernel */ + return IN_KERNEL; +} + int mce_severity(struct mce *a, int tolerant, char **msg) { + enum context ctx = error_context(a); struct severity *s; + for (s = severities;; s++) { if ((a->status & s->mask) != s->result) continue; if ((a->mcgstatus & s->mcgmask) != s->mcgres) continue; - if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && - tolerant < 1) - return MCE_PANIC_SEVERITY; + if (s->ser == SER_REQUIRED && !mce_ser) + continue; + if (s->ser == NO_SER && mce_ser) + continue; + if (s->context && ctx != s->context) + continue; if (msg) *msg = s->msg; + if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { + if (panic_on_oops || tolerant < 1) + return MCE_PANIC_SEVERITY; + } return s->sev; } } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff9c732989de..f051a7807ab4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -83,6 +83,7 @@ static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; static int mce_panic_timeout; +int mce_ser; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); * Those are just logged through /dev/mcelog. * * This is executed in standard interrupt context. + * + * Note: spec recommends to panic for fatal unsignalled + * errors here. However this would be quite problematic -- + * we would need to reimplement the Monarch handling and + * it would mess up the exclusion between exception handler + * and poll hander -- * so we skip this for now. + * These cases should not happen anyways, or only when the CPU + * is already totally * confused. In this case it's likely it will + * not fully execute the machine check handler either. */ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { @@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) continue; /* - * Uncorrected events are handled by the exception handler - * when it is enabled. But when the exception is disabled log - * everything. + * Uncorrected or signalled events are handled by the exception + * handler when it is enabled, so don't process those here. * * TBD do the same check for MCI_STATUS_EN here? */ - if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) + if (!(flags & MCP_UC) && + (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; if (m.status & MCI_STATUS_MISCV) @@ -789,6 +799,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) barrier(); + /* + * When no restart IP must always kill or panic. + */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + kill_it = 1; + /* * Go through all the banks in exclusion of the other CPUs. * This way we don't report duplicated events on shared banks @@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; /* - * Non uncorrected errors are handled by machine_check_poll - * Leave them alone, unless this panics. + * Non uncorrected or non signaled errors are handled by + * machine_check_poll. Leave them alone, unless this panics. */ - if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) + if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && + !no_way_out) continue; /* @@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); - __set_bit(i, toclear); + severity = mce_severity(&m, tolerant, NULL); - if (m.status & MCI_STATUS_EN) { - /* - * If this error was uncorrectable and there was - * an overflow, we're in trouble. If no overflow, - * we might get away with just killing a task. - */ - if (m.status & MCI_STATUS_UC) - kill_it = 1; - } else { + /* + * When machine check was for corrected handler don't touch, + * unless we're panicing. + */ + if (severity == MCE_KEEP_SEVERITY && !no_way_out) + continue; + __set_bit(i, toclear); + if (severity == MCE_NO_SEVERITY) { /* * Machine check event was not enabled. Clear, but * ignore. @@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; } + /* + * Kill on action required. + */ + if (severity == MCE_AR_SEVERITY) + kill_it = 1; + if (m.status & MCI_STATUS_MISCV) m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); if (m.status & MCI_STATUS_ADDRV) @@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_get_rip(&m, regs); mce_log(&m); - severity = mce_severity(&m, tolerant, NULL); if (severity > worst) { *final = m; worst = severity; @@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) * one task, do that. If the user has set the tolerance very * high, don't try to do anything at all. */ - if (kill_it && tolerant < 3) { - int user_space = 0; - - /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. - */ - if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = final->ip && (final->cs & 3); - /* - * If we know that the error was in user space, send a - * SIGBUS. Otherwise, panic if tolerance is low. - * - * force_sig() takes an awful lot of locks and has a slight - * risk of deadlocking. - */ - if (user_space) { - force_sig(SIGBUS, current); - } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", final, msg); - } - } + if (kill_it && tolerant < 3) + force_sig(SIGBUS, current); /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); @@ -1049,6 +1050,9 @@ static int mce_cap_init(void) if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) rip_msr = MSR_IA32_MCG_EIP; + if (cap & MCG_SER_P) + mce_ser = 1; + return 0; } -- cgit v1.2.2 From 4611a6fa4b37cf6b8b6066ed0d605c994c62a1a0 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 27 May 2009 21:56:57 +0200 Subject: x86, mce: export MCE severities coverage via debugfs The MCE severity judgement code is data-driven, so code coverage tools such as gcov can not be used for measuring coverage. Instead a dedicated coverage mechanism is implemented. The kernel keeps track of rules executed and reports them in debugfs. This is useful for increasing coverage of the mce-test testsuite. Right now it's unconditionally enabled because it's very little code. Signed-off-by: Huang Ying Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 83 +++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 4f4d2caf4043..ff0807f97056 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -10,6 +10,9 @@ * Author: Andi Kleen */ #include +#include +#include +#include #include #include "mce-internal.h" @@ -37,6 +40,7 @@ static struct severity { unsigned char mcgres; unsigned char ser; unsigned char context; + unsigned char covered; char *msg; } severities[] = { #define KERNEL .context = IN_KERNEL @@ -126,6 +130,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg) continue; if (msg) *msg = s->msg; + s->covered = 1; if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { if (panic_on_oops || tolerant < 1) return MCE_PANIC_SEVERITY; @@ -133,3 +138,81 @@ int mce_severity(struct mce *a, int tolerant, char **msg) return s->sev; } } + +static void *s_start(struct seq_file *f, loff_t *pos) +{ + if (*pos >= ARRAY_SIZE(severities)) + return NULL; + return &severities[*pos]; +} + +static void *s_next(struct seq_file *f, void *data, loff_t *pos) +{ + if (++(*pos) >= ARRAY_SIZE(severities)) + return NULL; + return &severities[*pos]; +} + +static void s_stop(struct seq_file *f, void *data) +{ +} + +static int s_show(struct seq_file *f, void *data) +{ + struct severity *ser = data; + seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); + return 0; +} + +static const struct seq_operations severities_seq_ops = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static int severities_coverage_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &severities_seq_ops); +} + +static ssize_t severities_coverage_write(struct file *file, + const char __user *ubuf, + size_t count, loff_t *ppos) +{ + int i; + for (i = 0; i < ARRAY_SIZE(severities); i++) + severities[i].covered = 0; + return count; +} + +static const struct file_operations severities_coverage_fops = { + .open = severities_coverage_open, + .release = seq_release, + .read = seq_read, + .write = severities_coverage_write, +}; + +static int __init severities_debugfs_init(void) +{ + struct dentry *dmce = NULL, *fseverities_coverage = NULL; + + dmce = debugfs_create_dir("mce", NULL); + if (dmce == NULL) + goto err_out; + fseverities_coverage = debugfs_create_file("severities-coverage", + 0444, dmce, NULL, + &severities_coverage_fops); + if (fseverities_coverage == NULL) + goto err_out; + + return 0; + +err_out: + if (fseverities_coverage) + debugfs_remove(fseverities_coverage); + if (dmce) + debugfs_remove(dmce); + return -ENOMEM; +} +late_initcall(severities_debugfs_init); -- cgit v1.2.2 From 4ef702c10b5df18ab04921fc252c26421d4d6c75 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:52 +0200 Subject: x86: fix panic with interrupts off (needed for MCE) For some time each panic() called with interrupts disabled triggered the !irqs_disabled() WARN_ON in smp_call_function(), producing ugly backtraces and confusing users. This is a common situation with machine checks for example which tend to call panic with interrupts disabled, but will also hit in other situations e.g. panic during early boot. In fact it means that panic cannot be called in many circumstances, which would be bad. This all started with the new fancy queued smp_call_function, which is then used by the shutdown path to shut down the other CPUs. On closer examination it turned out that the fancy RCU smp_call_function() does lots of things not suitable in a panic situation anyways, like allocating memory and relying on complex system state. I originally tried to patch this over by checking for panic there, but it was quite complicated and the original patch was also not very popular. This also didn't fix some of the underlying complexity problems. The new code in post 2.6.29 tries to patch around this by checking for oops_in_progress, but that is not enough to make this fully safe and I don't think that's a real solution because panic has to be reliable. So instead use an own vector to reboot. This makes the reboot code extremly straight forward, which is definitely a big plus in a panic situation where it is important to avoid relying on too much kernel state. The new simple code is also safe to be called from interupts off region because it is very very simple. There can be situations where it is important that panic is reliable. For example on a fatal machine check the panic is needed to get the system up again and running as quickly as possible. So it's important that panic is reliable and all function it calls simple. This is why I came up with this simple vector scheme. It's very hard to beat in simplicity. Vectors are not particularly precious anymore since all big systems are using per CPU vectors. Another possibility would have been to use an NMI similar to kdump, but there is still the problem that NMIs don't work reliably on some systems due to BIOS issues. NMIs would have been able to stop CPUs running with interrupts off too. In the sake of universal reliability I opted for using a non NMI vector for now. I put the reboot vector into the highest priority bucket of the APIC vectors and moved the 64bit UV_BAU message down instead into the next lower priority. [ Impact: bug fix, fixes an old regression ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/entry_arch.h | 1 + arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 9 +++------ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/irqinit.c | 3 +++ arch/x86/kernel/smp.c | 28 +++++++++++++++++++++++++++- 6 files changed, 37 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 4cdcf5a3c96b..69f886805ecb 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) +BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, smp_invalidate_interrupt) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4e59197e29ba..1c8f28a63058 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -45,6 +45,7 @@ extern void invalidate_interrupt6(void); extern void invalidate_interrupt7(void); extern void irq_move_cleanup_interrupt(void); +extern void reboot_interrupt(void); extern void threshold_interrupt(void); extern void call_function_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 68f7cf84a333..28477e4f2d49 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -88,12 +88,7 @@ #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa #define THRESHOLD_APIC_VECTOR 0xf9 - -#ifdef CONFIG_X86_32 -/* 0xf8 : free */ -#else -# define UV_BAU_MESSAGE 0xf8 -#endif +#define REBOOT_VECTOR 0xf8 /* f0-f7 used for spreading out TLB flushes: */ #define INVALIDATE_TLB_VECTOR_END 0xf7 @@ -117,6 +112,8 @@ */ #define GENERIC_INTERRUPT_VECTOR 0xed +#define UV_BAU_MESSAGE 0xec + /* * Self IPI vector for machine checks */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 711c130a8411..4234b1235652 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -976,6 +976,8 @@ END(\sym) #ifdef CONFIG_SMP apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt +apicinterrupt REBOOT_VECTOR \ + reboot_interrupt smp_reboot_interrupt #endif #ifdef CONFIG_X86_UV diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 441f6ec6e9d4..4a69ec55be3d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -173,6 +173,9 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); + + /* IPI used for rebooting/stopping */ + alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); #endif #endif /* CONFIG_SMP */ } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index f6db48c405b8..bf1831aa14fa 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask) * this function calls the 'stop' function on all other CPUs in the system. */ +asmlinkage void smp_reboot_interrupt(void) +{ + ack_APIC_irq(); + irq_enter(); + stop_this_cpu(NULL); + irq_exit(); +} + static void native_smp_send_stop(void) { unsigned long flags; + unsigned long wait; if (reboot_force) return; - smp_call_function(stop_this_cpu, NULL, 0); + /* + * Use an own vector here because smp_call_function + * does lots of things not suitable in a panic situation. + * On most systems we could also use an NMI here, + * but there are a few systems around where NMI + * is problematic so stay with an non NMI for now + * (this implies we cannot stop CPUs spinning with irq off + * currently) + */ + if (num_online_cpus() > 1) { + apic->send_IPI_allbutself(REBOOT_VECTOR); + + /* Don't wait longer than a second */ + wait = USEC_PER_SEC; + while (num_online_cpus() > 1 && wait--) + udelay(1); + } + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); -- cgit v1.2.2 From 9ff36ee9668ff41ec3274597c730524645929b0f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:58 +0200 Subject: x86, mce: rename mce_notify_user to mce_notify_irq Rename the mce_notify_user function to mce_notify_irq. The next patch will split the wakeup handling of interrupt context and of process context and it's better to give it a clearer name for this. Contains a fix from Ying Huang [ Impact: cleanup ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Cc: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce-inject.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 10 +++++----- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/signal.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index afd3cdf6f8ad..713926b62cbb 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -159,7 +159,7 @@ enum mcp_flags { }; void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_user(void); +int mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 7d858fb4ce67..a3a235a53f09 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -65,7 +65,7 @@ static void raise_mce(unsigned long data) memset(&b, 0xff, sizeof(mce_banks_t)); printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); machine_check_poll(0, &b); - mce_notify_user(); + mce_notify_irq(); printk(KERN_INFO "Finished machine check poll on CPU %d\n", cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f051a7807ab4..13e1b7ffe73a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -348,7 +348,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) ack_APIC_irq(); exit_idle(); irq_enter(); - mce_notify_user(); + mce_notify_irq(); irq_exit(); } #endif @@ -356,7 +356,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) static void mce_report_event(struct pt_regs *regs) { if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { - mce_notify_user(); + mce_notify_irq(); return; } @@ -968,7 +968,7 @@ static void mcheck_timer(unsigned long data) * polling interval, otherwise increase the polling interval. */ n = &__get_cpu_var(next_interval); - if (mce_notify_user()) + if (mce_notify_irq()) *n = max(*n/2, HZ/100); else *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); @@ -989,7 +989,7 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); * Can be called from interrupt context, but not from machine check/NMI * context. */ -int mce_notify_user(void) +int mce_notify_irq(void) { /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); @@ -1014,7 +1014,7 @@ int mce_notify_user(void) } return 0; } -EXPORT_SYMBOL_GPL(mce_notify_user); +EXPORT_SYMBOL_GPL(mce_notify_irq); /* * Initialize Machine Checks for a CPU. diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index eff3740501a3..b7c5a2470b40 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -80,7 +80,7 @@ static int cmci_supported(int *banks) static void intel_threshold_interrupt(void) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_user(); + mce_notify_irq(); } static void print_update(char *type, int *hdr, int num) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d0851e3f77eb..d5dc15bce005 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_NEW_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_user(); + mce_notify_irq(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ /* deal with pending signal delivery */ -- cgit v1.2.2 From 8fa8dd9e3aafb7b440b7d54219891615abc6390e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:58 +0200 Subject: x86, mce: define MCE_VECTOR Add MCE_VECTOR for the #MC exception. Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 28477e4f2d49..1b35c4357ea8 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -25,6 +25,7 @@ */ #define NMI_VECTOR 0x02 +#define MCE_VECTOR 0x12 /* * IDT vectors usable for external interrupt sources start -- cgit v1.2.2 From 9b1beaf2b551a8a1604f104025b24e9c535c8963 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:59 +0200 Subject: x86, mce: support action-optional machine checks Newer Intel CPUs support a new class of machine checks called recoverable action optional. Action Optional means that the CPU detected some form of corruption in the background and tells the OS about using a machine check exception. The OS can then take appropiate action, like killing the process with the corrupted data or logging the event properly to disk. This is done by the new generic high level memory failure handler added in a earlier patch. The high level handler takes the address with the failed memory and does the appropiate action, like killing the process. In this version of the patch the high level handler is stubbed out with a weak function to not create a direct dependency on the hwpoison branch. The high level handler cannot be directly called from the machine check exception though, because it has to run in a defined process context to be able to sleep when taking VM locks (it is not expected to sleep for a long time, just do so in some exceptional cases like lock contention) Thus the MCE handler has to queue a work item for process context, trigger process context and then call the high level handler from there. This patch adds two path to process context: through a per thread kernel exit notify_user() callback or through a high priority work item. The first runs when the process exits back to user space, the other when it goes to sleep and there is no higher priority process. The machine check handler will schedule both, and whoever runs first will grab the event. This is done because quick reaction to this event is critical to avoid a potential more fatal machine check when the corruption is consumed. There is a simple lock less ring buffer to queue the corrupted addresses between the exception handler and the process context handler. Then in process context it just calls the high level VM code with the corrupted PFNs. The code adds the required code to extract the failed address from the CPU's machine check registers. It doesn't try to handle all possible cases -- the specification has 6 different ways to specify memory address -- but only the linear address. Most of the required checking has been already done earlier in the mce_severity rule checking engine. Following the Intel recommendations Action Optional errors are only enabled for known situations (encoded in MCACODs). The errors are ignored otherwise, because they are action optional. v2: Improve comment, disable preemption while processing ring buffer (reported by Ying Huang) Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 133 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/signal.c | 2 +- 3 files changed, 135 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 713926b62cbb..82978ad12072 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -160,6 +160,7 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); +void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 13e1b7ffe73a..d4e7b5947a0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,8 @@ static inline int skip_bank_init(int i) return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); } +static DEFINE_PER_CPU(struct work_struct, mce_work); + /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) { @@ -312,6 +315,61 @@ static void mce_wrmsrl(u32 msr, u64 v) wrmsrl(msr, v); } +/* + * Simple lockless ring to communicate PFNs from the exception handler with the + * process context work function. This is vastly simplified because there's + * only a single reader and a single writer. + */ +#define MCE_RING_SIZE 16 /* we use one entry less */ + +struct mce_ring { + unsigned short start; + unsigned short end; + unsigned long ring[MCE_RING_SIZE]; +}; +static DEFINE_PER_CPU(struct mce_ring, mce_ring); + +/* Runs with CPU affinity in workqueue */ +static int mce_ring_empty(void) +{ + struct mce_ring *r = &__get_cpu_var(mce_ring); + + return r->start == r->end; +} + +static int mce_ring_get(unsigned long *pfn) +{ + struct mce_ring *r; + int ret = 0; + + *pfn = 0; + get_cpu(); + r = &__get_cpu_var(mce_ring); + if (r->start == r->end) + goto out; + *pfn = r->ring[r->start]; + r->start = (r->start + 1) % MCE_RING_SIZE; + ret = 1; +out: + put_cpu(); + return ret; +} + +/* Always runs in MCE context with preempt off */ +static int mce_ring_add(unsigned long pfn) +{ + struct mce_ring *r = &__get_cpu_var(mce_ring); + unsigned next; + + next = (r->end + 1) % MCE_RING_SIZE; + if (next == r->start) + return -1; + r->ring[r->end] = pfn; + wmb(); + r->end = next; + return 0; +} + int mce_available(struct cpuinfo_x86 *c) { if (mce_disabled) @@ -319,6 +377,15 @@ int mce_available(struct cpuinfo_x86 *c) return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } +static void mce_schedule_work(void) +{ + if (!mce_ring_empty()) { + struct work_struct *work = &__get_cpu_var(mce_work); + if (!work_pending(work)) + schedule_work(work); + } +} + /* * Get the address of the instruction at the time of the machine check * error. @@ -349,6 +416,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); mce_notify_irq(); + mce_schedule_work(); irq_exit(); } #endif @@ -357,6 +425,13 @@ static void mce_report_event(struct pt_regs *regs) { if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { mce_notify_irq(); + /* + * Triggering the work queue here is just an insurance + * policy in case the syscall exit notify handler + * doesn't run soon enough or ends up running on the + * wrong CPU (can happen when audit sleeps) + */ + mce_schedule_work(); return; } @@ -731,6 +806,23 @@ reset: return ret; } +/* + * Check if the address reported by the CPU is in a format we can parse. + * It would be possible to add code for most other cases, but all would + * be somewhat complicated (e.g. segment offset would require an instruction + * parser). So only support physical addresses upto page granuality for now. + */ +static int mce_usable_address(struct mce *m) +{ + if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) + return 0; + if ((m->misc & 0x3f) > PAGE_SHIFT) + return 0; + if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) + return 0; + return 1; +} + static void mce_clear_state(unsigned long *toclear) { int i; @@ -865,6 +957,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (m.status & MCI_STATUS_ADDRV) m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); + /* + * Action optional error. Queue address for later processing. + * When the ring overflows we just ignore the AO error. + * RED-PEN add some logging mechanism when + * usable_address or mce_add_ring fails. + * RED-PEN don't ignore overflow for tolerant == 0 + */ + if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) + mce_ring_add(m.addr >> PAGE_SHIFT); + mce_get_rip(&m, regs); mce_log(&m); @@ -916,6 +1018,36 @@ out: } EXPORT_SYMBOL_GPL(do_machine_check); +/* dummy to break dependency. actual code is in mm/memory-failure.c */ +void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) +{ + printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); +} + +/* + * Called after mce notification in process context. This code + * is allowed to sleep. Call the high level VM handler to process + * any corrupted pages. + * Assume that the work queue code only calls this one at a time + * per CPU. + * Note we don't disable preemption, so this code might run on the wrong + * CPU. In this case the event is picked up by the scheduled work queue. + * This is merely a fast path to expedite processing in some common + * cases. + */ +void mce_notify_process(void) +{ + unsigned long pfn; + mce_notify_irq(); + while (mce_ring_get(&pfn)) + memory_failure(pfn, MCE_VECTOR); +} + +static void mce_process_work(struct work_struct *dummy) +{ + mce_notify_process(); +} + #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog @@ -1204,6 +1336,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) mce_init(); mce_cpu_features(c); mce_init_timer(); + INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); } /* diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5dc15bce005..4976888094f0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_NEW_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_irq(); + mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ /* deal with pending signal delivery */ -- cgit v1.2.2 From 8051dbd2dfd1427cc102888d7d96bf39de0be150 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 2 Jun 2009 16:53:23 +0900 Subject: x86, mce: fix for mce counters Make the MCE counters work on 32bit and add poll count in arch_irq_stat_cpu. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index eff46b5de62f..9773395aa758 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -95,7 +95,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, " Threshold APIC interrupts\n"); # endif #endif -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#ifdef CONFIG_X86_NEW_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); @@ -172,9 +172,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = irq_stats(cpu)->__nmi_count; -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) - sum += per_cpu(mce_exception_count, cpu); -#endif #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; @@ -191,6 +188,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu) # ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; # endif +#endif +#ifdef CONFIG_X86_NEW_MCE + sum += per_cpu(mce_exception_count, cpu); + sum += per_cpu(mce_poll_count, cpu); #endif return sum; } -- cgit v1.2.2 From a89ab11454b476d2d9e8a10f903360a62e21bac8 Mon Sep 17 00:00:00 2001 From: Peter Ma Date: Mon, 8 Jun 2009 12:54:02 +0200 Subject: avr32: Change Atmel ATNGW100 config to add choice of add-on board Signed-off-by: Peter Ma Signed-off-by: Haavard Skinnemoen --- arch/avr32/boards/atngw100/Kconfig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/avr32/boards/atngw100/Kconfig b/arch/avr32/boards/atngw100/Kconfig index b3f99477bbeb..f41c5335e6a9 100644 --- a/arch/avr32/boards/atngw100/Kconfig +++ b/arch/avr32/boards/atngw100/Kconfig @@ -2,8 +2,15 @@ if BOARD_ATNGW100 +choice + prompt "Select an NGW100 add-on board to support" + default BOARD_ATNGW100_ADDON_NONE + +config BOARD_ATNGW100_ADDON_NONE + bool "None" + config BOARD_ATNGW100_EVKLCD10X - bool "Add support for EVKLCD10X addon board" + bool "EVKLCD10X addon board" help This enables support for the EVKLCD100 (QVGA) or EVKLCD101 (VGA) addon board for the NGW100. By enabling this the LCD controller and @@ -14,7 +21,7 @@ config BOARD_ATNGW100_EVKLCD10X The MCI pins can be reenabled by editing the "add device function" but this may break the setup for other displays that use these pins. - Choose 'Y' here if you have a EVKLCD100/101 connected to the NGW100. +endchoice choice prompt "LCD panel resolution on EVKLCD10X" -- cgit v1.2.2 From 4024533e60787a5507818b0c0fdb44ddb522cdf5 Mon Sep 17 00:00:00 2001 From: Peter Ma Date: Mon, 8 Jun 2009 12:55:35 +0200 Subject: avr32: Add support for Mediama RMTx add-on board for ATNGW100 Signed-off-by: Peter Ma Signed-off-by: Haavard Skinnemoen --- arch/avr32/boards/atngw100/Kconfig | 16 + arch/avr32/boards/atngw100/Kconfig_mrmt | 80 ++ arch/avr32/boards/atngw100/Makefile | 1 + arch/avr32/boards/atngw100/mrmt.c | 373 ++++++++ arch/avr32/boards/atngw100/setup.c | 5 + arch/avr32/configs/atngw100_mrmt_defconfig | 1363 ++++++++++++++++++++++++++++ 6 files changed, 1838 insertions(+) create mode 100644 arch/avr32/boards/atngw100/Kconfig_mrmt create mode 100644 arch/avr32/boards/atngw100/mrmt.c create mode 100644 arch/avr32/configs/atngw100_mrmt_defconfig (limited to 'arch') diff --git a/arch/avr32/boards/atngw100/Kconfig b/arch/avr32/boards/atngw100/Kconfig index f41c5335e6a9..be27a0218ab4 100644 --- a/arch/avr32/boards/atngw100/Kconfig +++ b/arch/avr32/boards/atngw100/Kconfig @@ -21,6 +21,18 @@ config BOARD_ATNGW100_EVKLCD10X The MCI pins can be reenabled by editing the "add device function" but this may break the setup for other displays that use these pins. +config BOARD_ATNGW100_MRMT + bool "Mediama RMT1/2 add-on board" + help + This enables support for the Mediama RMT1 or RMT2 board. + RMT provides LCD support, AC97 codec and other + optional peripherals to the Atmel NGW100. + + This choice disables the detect pin and the write-protect pin for the + MCI platform device, since it conflicts with the LCD platform device. + The MCI pins can be reenabled by editing the "add device function" but + this may break the setup for other displays that use these pins. + endchoice choice @@ -39,4 +51,8 @@ config BOARD_ATNGW100_EVKLCD10X_POW_QVGA endchoice +if BOARD_ATNGW100_MRMT +source "arch/avr32/boards/atngw100/Kconfig_mrmt" +endif + endif # BOARD_ATNGW100 diff --git a/arch/avr32/boards/atngw100/Kconfig_mrmt b/arch/avr32/boards/atngw100/Kconfig_mrmt new file mode 100644 index 000000000000..9a199a207f3c --- /dev/null +++ b/arch/avr32/boards/atngw100/Kconfig_mrmt @@ -0,0 +1,80 @@ +# RMT for NGW100 customization + +choice + prompt "RMT Version" + help + Select the RMTx board version. + +config BOARD_MRMT_REV1 + bool "RMT1" +config BOARD_MRMT_REV2 + bool "RMT2" + +endchoice + +config BOARD_MRMT_AC97 + bool "Enable AC97 CODEC" + help + Enable the UCB1400 AC97 CODEC driver. + +choice + prompt "Touchscreen Driver" + default BOARD_MRMT_ADS7846_TS + +config BOARD_MRMT_UCB1400_TS + bool "Use UCB1400 Touchscreen" + +config BOARD_MRMT_ADS7846_TS + bool "Use ADS7846 Touchscreen" + +endchoice + +choice + prompt "RMTx LCD Selection" + default BOARD_MRMT_LCD_DISABLE + +config BOARD_MRMT_LCD_DISABLE + bool "LCD Disabled" + +config BOARD_MRMT_LCD_LQ043T3DX0X + bool "Sharp LQ043T3DX0x or compatible" + help + If using RMT2, be sure to load the resistor pack selectors accordingly + +if BOARD_MRMT_REV2 +config BOARD_MRMT_LCD_KWH043GM08 + bool "Formike KWH043GM08 or compatible" + help + Be sure to load the RMT2 resistor pack selectors accordingly +endif + +endchoice + +if !BOARD_MRMT_LCD_DISABLE +config BOARD_MRMT_BL_PWM + bool "Use PWM control for LCD Backlight" + help + Use PWM driver for controlling LCD Backlight. + Otherwise, LCD Backlight is always on. +endif + +config BOARD_MRMT_RTC_I2C + bool "Use External RTC on I2C Bus" + help + RMT1 has an optional RTC device on the I2C bus. + It is a SII S35390A. Be sure to select the + matching RTC driver. + +choice + prompt "Wireless Module on ttyS2" + default BOARD_MRMT_WIRELESS_ZB + +config BOARD_MRMT_WIRELESS_ZB + bool "Use ZigBee/802.15.4 Module" + +config BOARD_MRMT_WIRELESS_BT + bool "Use Bluetooth (HCI) Module" + +config BOARD_MRMT_WIRELESS_NONE + bool "Not Installed" +endchoice diff --git a/arch/avr32/boards/atngw100/Makefile b/arch/avr32/boards/atngw100/Makefile index 6376f5322e4d..f4ebe42a8254 100644 --- a/arch/avr32/boards/atngw100/Makefile +++ b/arch/avr32/boards/atngw100/Makefile @@ -1,2 +1,3 @@ obj-y += setup.o flash.o obj-$(CONFIG_BOARD_ATNGW100_EVKLCD10X) += evklcd10x.o +obj-$(CONFIG_BOARD_ATNGW100_MRMT) += mrmt.o diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c new file mode 100644 index 000000000000..bf78e516a85f --- /dev/null +++ b/arch/avr32/boards/atngw100/mrmt.c @@ -0,0 +1,373 @@ +/* + * Board-specific setup code for Remote Media Terminal 1 (RMT1) + * add-on board for the ATNGW100 Network Gateway + * + * Copyright (C) 2008 Mediama Technologies + * Based on ATNGW100 Network Gateway (Copyright (C) Atmel) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include