From b6f42a4a3c886bd18baf319d433a841ac9942c02 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:31 -0700 Subject: x86/xsaves: Add a kernel parameter noxsaves to disable xsaves/xrstors This patch adds a kernel parameter noxsaves to disable xsaves/xrstors feature. The kernel will fall back to use xsaveopt and xrstor to save and restor xstates. By using this parameter, xsave area occupies more memory because standard form of xsave area in xsaveopt/xrstor occupies more memory than compacted form of xsave area. This patch adds a description of the kernel parameter noxsaveopt in doc. The code to support the parameter noxsaveopt has been in the kernel before. This patch just adds the description of this parameter in the doc. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-4-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e7c4b979d504..cdc95852532d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -146,6 +146,7 @@ static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX2); return 1; @@ -159,6 +160,13 @@ static int __init x86_xsaveopt_setup(char *s) } __setup("noxsaveopt", x86_xsaveopt_setup); +static int __init x86_xsaves_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + return 1; +} +__setup("noxsaves", x86_xsaves_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; -- cgit v1.2.2 From 5b3e83f46a2a7e8625258dbf84a26e7f4032bfa8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:32 -0700 Subject: x86/alternative: Add alternative_input_2 to support alternative with two features and input alternative_input_2() replaces old instruction with new instructions with input based on two features. In alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, feature2, input...), feature2 has higher priority to replace oldinstr than feature1. If CPU has feature2, newinstr2 replaces oldinstr and newinstr2 is executed during run time. If CPU doesn't have feature2, but it has feature1, newinstr1 replaces oldinstr and newinstr1 is executed during run time. If CPU doesn't have feature2 and feature1, oldinstr is executed during run time. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-5-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0a3f9c9f98d5..473bdbee378a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end) asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ : : "i" (0), ## input) +/* + * This is similar to alternative_input. But it has two features and + * respective instructions. + * + * If CPU has feature2, newinstr2 is used. + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ + feature2, input...) \ + asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ + : : "i" (0), ## input) + /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ -- cgit v1.2.2 From 0b29643a58439dc9a8b0c0cacad0e7cb608c8199 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:33 -0700 Subject: x86/xsaves: Change compacted format xsave area header The XSAVE area header is changed to support both compacted format and standard format of xsave area. The XSAVE header of an xsave area comprises the 64 bytes starting at offset 512 from the area base address: - Bytes 7:0 of the xsave header is a state-component bitmap called xstate_bv. It identifies the state components in the xsave area. - Bytes 15:8 of the xsave header is a state-component bitmap called xcomp_bv. It is used as follows: - xcomp_bv[63] indicates the format of the extended region of the xsave area. If it is clear, the standard format is used. If it is set, the compacted format is used. - xcomp_bv[62:0] indicate which features (starting at feature 2) have space allocated for them in the compacted format. - Bytes 63:16 of the xsave header are reserved. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-6-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a4ea02351f4d..2c8d3b8e7fcb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -386,8 +386,8 @@ struct bndcsr_struct { struct xsave_hdr_struct { u64 xstate_bv; - u64 reserved1[2]; - u64 reserved2[5]; + u64 xcomp_bv; + u64 reserved[6]; } __attribute__((packed)); struct xsave_struct { -- cgit v1.2.2 From 200b08a970b2ae764b670a326088ab8bc0a989cc Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:34 -0700 Subject: x86/xsaves: Define macros for xsave instructions Define macros for xsave, xsaveopt, xsaves, xrstor, and xrstors inline instructions. The instructions will be used for saving and restoring xstate. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-7-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index d949ef28c48b..71bdde45b519 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -52,6 +52,12 @@ extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + static inline int fpu_xrstor_checking(struct xsave_struct *fx) { int err; -- cgit v1.2.2 From b84e70552e5aad71a1c14536e6ffcfe7934b73e4 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:35 -0700 Subject: x86/xsaves: Define a macro for handling xsave/xrstor instruction fault Define a macro to handle fault generated by xsave, xsaveopt, xsaves, xrstor, and xrstors instructions. It is used in functions like xsave_state() etc. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-8-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 71bdde45b519..76c2459188c8 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -58,6 +58,13 @@ extern int init_fpu(struct task_struct *child); #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" +#define xstate_fault ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) + static inline int fpu_xrstor_checking(struct xsave_struct *fx) { int err; -- cgit v1.2.2 From f31a9f7c71691569359fa7fb8b0acaa44bce0324 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:36 -0700 Subject: x86/xsaves: Use xsaves/xrstors to save and restore xsave area If xsaves is eanbled, use xsaves/xrstors instrucitons to save and restore xstate. xsaves and xrstors support compacted format, init optimization, modified optimization, and supervisor states. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-9-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 84 +++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 76c2459188c8..f9177a2a97e9 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -65,6 +65,70 @@ extern int init_fpu(struct task_struct *child); _ASM_EXTABLE(1b, 3b) \ : [err] "=r" (err) +/* + * Save processor xstate to xsave area. + */ +static inline int xsave_state(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + "1:"XSAVEOPT, + X86_FEATURE_XSAVEOPT, + "1:"XSAVES, + X86_FEATURE_XSAVES, + [fx] "D" (fx), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore processor xstate from xsave area. + */ +static inline int xrstor_state(struct xsave_struct *fx, u64 mask) +{ + int err = 0; + u32 lmask = mask; + u32 hmask = mask >> 32; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + "1: " XRSTORS, + X86_FEATURE_XSAVES, + "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + static inline int fpu_xrstor_checking(struct xsave_struct *fx) { int err; @@ -130,26 +194,6 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) return err; } -static inline void xrstor_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static inline void xsave_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select -- cgit v1.2.2 From f9de314b340f4816671f037e79ed01f685ac9787 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:37 -0700 Subject: x86/xsaves: Use xsaves/xrstors for context switch If xsaves is eanbled, use xsaves/xrstors for context switch to support compacted format xsave area to occupy less memory and modified optimization to improve saving performance. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-10-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index f9177a2a97e9..8b75824e41dd 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -129,22 +129,20 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) return err; } -static inline int fpu_xrstor_checking(struct xsave_struct *fx) +/* + * Save xstate context for old process during context switch. + */ +static inline void fpu_xsave(struct fpu *fpu) { - int err; - - asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) - : "memory"); + xsave_state(&fpu->state->xsave, -1); +} - return err; +/* + * Restore xstate context for new process during context switch. + */ +static inline int fpu_xrstor_checking(struct xsave_struct *fx) +{ + return xrstor_state(fx, -1); } static inline int xsave_user(struct xsave_struct __user *buf) @@ -194,15 +192,4 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) return err; } -static inline void fpu_xsave(struct fpu *fpu) -{ - /* This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - alternative_input( - ".byte " REX_PREFIX "0x0f,0xae,0x27", - ".byte " REX_PREFIX "0x0f,0xae,0x37", - X86_FEATURE_XSAVEOPT, - [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : - "memory"); -} #endif -- cgit v1.2.2 From facbf4d91ae64f84ef93a00e4037135cd9f4b2ab Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:38 -0700 Subject: x86/xsaves: Use xsave/xrstor for saving and restoring user space context We use legacy xsave/xrstor to save and restore standard form of xsave area in user space context. No xsaveopt or xsaves is used here for two reasons. First, we don't want to use modified optimization which is implemented in xsaveopt and xsaves because xrstor/xrstors might track a wrong user space application. Secondly, we don't use compacted format of xsave area for backward compatibility because legacy user space applications only don't understand the compacted format of the xsave area. Using standard form of the xsave area may allocate more memory for user context than compacted form, but preserves compatibility with legacy applications. Furthermore, even with holes, the relevant cache lines don't get touched and thus the performance impact is limited. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-11-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8b75824e41dd..0d1523146545 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -145,6 +145,16 @@ static inline int fpu_xrstor_checking(struct xsave_struct *fx) return xrstor_state(fx, -1); } +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ static inline int xsave_user(struct xsave_struct __user *buf) { int err; @@ -158,35 +168,28 @@ static inline int xsave_user(struct xsave_struct __user *buf) return -EFAULT; __asm__ __volatile__(ASM_STAC "\n" - "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "1:"XSAVE"\n" "2: " ASM_CLAC "\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b,3b) - : [err] "=r" (err) + xstate_fault : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); return err; } +/* + * Restore xstate from user space xsave area. + */ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { - int err; + int err = 0; struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); u32 lmask = mask; u32 hmask = mask >> 32; __asm__ __volatile__(ASM_STAC "\n" - "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "1:"XRSTOR"\n" "2: " ASM_CLAC "\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b,3b) - : [err] "=r" (err) + xstate_fault : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) : "memory"); /* memory required? */ return err; -- cgit v1.2.2 From 21e726c4a3625a1038e97795b7aad97109ba7e19 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:39 -0700 Subject: x86/xsaves: Clear reserved bits in xsave header The reserved bits (128~511) in the xsave header must be zero according to X86 SDM. Clear the bits in this patch. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-12-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i387.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5dd80814419..a9a4229f6161 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); return ret; } -- cgit v1.2.2 From adb9d526e98268b647a74726346e1c40e6a37d2e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:40 -0700 Subject: x86/xsaves: Add xsaves and xrstors support for booting time Since boot_cpu_data and cpu capabilities are not enabled yet during early booting time, alternative can not be used in some functions to access xsave area. Therefore, we define two new functions xrstor_state_booting() and xsave_state_booting() to access xsave area just during early booting time. xrstor_state_booting restores xstate from xsave area during early booting time. xsave_state_booting saves xstate to xsave area during early booting time. The two functions are similar to xrstor_state and xsave_state respectively. But the two functions don't use alternatives because alternatives are not enabled when they are called in such early booting time. xrstor_state_booting is called only by functions defined as __init. So it's defined as __init and will be removed from memory after booting time. There is no extra memory cost caused by this function during running time. But because xsave_state_booting can be called by run-time function __save_fpu(), it's not defined as __init and will stay in memory during running time although it will not be called anymore during running time. It is not ideal to have this function stay in memory during running time. But it's a pretty small function and the memory cost will be small. By doing in this way, we can avoid to change a lot of code to just remove this small function and save a bit memory for running time. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-13-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0d1523146545..aa3ff0cca9a1 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -65,6 +65,66 @@ extern int init_fpu(struct task_struct *child); _ASM_EXTABLE(1b, 3b) \ : [err] "=r" (err) +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static int xsave_state_booting(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile(xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile(xstate_fault + : "0" (0) + : "memory"); + + return err; +} + /* * Save processor xstate to xsave area. */ -- cgit v1.2.2 From f41d830fa890044cb60f6bb39fc8f6493ffebb47 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:41 -0700 Subject: x86/xsaves: Save xstate to task's xsave area in __save_fpu during booting time __save_fpu() can be called during early booting time when cpu caps are not enabled and alternative can not be used yet. Therefore, it calls xsave_state_booting() during booting time to save xstate to task's xsave area. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-14-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index cea1c76d49bf..6099c0e5b62e 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -508,9 +508,12 @@ static inline void user_fpu_begin(void) static inline void __save_fpu(struct task_struct *tsk) { - if (use_xsave()) - xsave_state(&tsk->thread.fpu.state->xsave, -1); - else + if (use_xsave()) { + if (unlikely(system_state == SYSTEM_BOOTING)) + xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); + else + xsave_state(&tsk->thread.fpu.state->xsave, -1); + } else fpu_fxsave(&tsk->thread.fpu); } -- cgit v1.2.2 From 47c2f292cc8669f70644a949cadd5fa5ee0e0e07 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:42 -0700 Subject: x86/xsaves: Call booting time xsaves and xrstors in setup_init_fpu_buf setup_init_fpu_buf() calls booting time xsaves and xrstors to save and restore xstate in xsave area. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-15-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a4b451c6addf..8fa7c7d4183d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -496,15 +496,21 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); + if (cpu_has_xsaves) { + init_xstate_buf->xsave_hdr.xcomp_bv = + (u64)1 << 63 | pcntxt_mask; + init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; + } + /* * Init all the features state with header_bv being 0x0 */ - xrstor_state(init_xstate_buf, -1); + xrstor_state_booting(init_xstate_buf, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state(init_xstate_buf, -1); + xsave_state_booting(init_xstate_buf, -1); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -- cgit v1.2.2 From 7e7ce87f6ad4e1730364e5e76628b43c5759b700 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:43 -0700 Subject: x86/xsaves: Enable xsaves/xrstors If xsaves/xrstors is enabled, compacted format of xsave area will be used and less memory may be used for context per process. And modified optimization implemented in xsaves/xrstors improves performance of saving xstate. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-16-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 8fa7c7d4183d..f930f8ab92d1 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -24,7 +25,9 @@ u64 pcntxt_mask; struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; +static unsigned int *xstate_offsets, *xstate_sizes; +static unsigned int *xstate_comp_offsets, *xstate_comp_sizes; +static unsigned int xstate_features; /* * If a processor implementation discern that a processor state component is @@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk, if (use_xsave()) { /* These bits must be zero. */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + memset(xsave_hdr->reserved, 0, 48); /* * Init the state that is not present in the memory @@ -526,6 +529,30 @@ static int __init eager_fpu_setup(char *s) } __setup("eagerfpu=", eager_fpu_setup); + +/* + * Calculate total size of enabled xstates in XCR0/pcntxt_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + /* * Enable and initialize the xsave feature. */ @@ -557,8 +584,7 @@ static void __init xstate_enable_boot_cpu(void) /* * Recompute the context size for enabled features */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; + init_xstate_size(); update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); @@ -578,8 +604,9 @@ static void __init xstate_enable_boot_cpu(void) } } - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", - pcntxt_mask, xstate_size); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", + pcntxt_mask, xstate_size, + cpu_has_xsaves ? "compacted form" : "standard form"); } /* -- cgit v1.2.2 From 7496d6458fe3219d63848ce4a9afbd86245cab22 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 May 2014 11:12:44 -0700 Subject: Define kernel API to get address of each state in xsave area In standard form, each state is saved in the xsave area in fixed offset. But in compacted form, offset of each saved state only can be calculated during run time because some xstates may not be enabled and saved. We define kernel API get_xsave_addr() returns address of a given state saved in a xsave area. It can be called in kernel to get address of each xstate in xsave area in either standard format or compacted format. It's useful when kernel wants to directly access each state in xsave area. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-17-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 3 +++ arch/x86/kernel/process.c | 1 + arch/x86/kernel/xsave.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index aa3ff0cca9a1..1ba577c670ad 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -255,4 +255,7 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) return err; } +void *get_xsave_addr(struct xsave_struct *xsave, int xstate); +void setup_xstate_comp(void); + #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4505e2a950d8..f804dc935d2a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -93,6 +93,7 @@ void arch_task_cache_init(void) kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), SLAB_PANIC | SLAB_NOTRACK, NULL); + setup_xstate_comp(); } /* diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index f930f8ab92d1..a6cb8233f628 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -481,6 +481,47 @@ static void __init setup_xstate_features(void) } while (1); } +/* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + * + * Input: void + * Output: void + */ +void setup_xstate_comp(void) +{ + int i; + + xstate_comp_offsets = kmalloc(xstate_features * sizeof(int), + GFP_KERNEL); + xstate_comp_sizes = kmalloc(xstate_features * sizeof(int), GFP_KERNEL); + + if (!cpu_has_xsaves) { + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + /* * setup the xstate image representing the init state */ @@ -668,3 +709,26 @@ void eager_fpu_init(void) else fxrstor_checking(&init_xstate_buf->i387); } + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Inputs: + * xsave: base address of the xsave area; + * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, + * etc.) + * Output: + * address of the state in the xsave area. + */ +void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +{ + int feature = fls64(xstate) - 1; + if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature]; +} -- cgit v1.2.2 From c9e5a5a7034146493386d985ff432aed8059929a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 30 May 2014 08:19:21 -0700 Subject: x86/xsave: Make it clear that the XSAVE macros use (%edi)/(%rdi) The XSAVE instruction family takes a memory argment. The macros use (%edi)/(%rdi) as that memory argument - make that clear to the reader. Signed-off-by: H. Peter Anvin Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-7-git-send-email-fenghua.yu@intel.com --- arch/x86/include/asm/xsave.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 1ba577c670ad..bbebd6e0a9ce 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -52,6 +52,7 @@ extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -- cgit v1.2.2 From 8ff925e10f2c72680918b95173ef4f8bb982d59e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 30 May 2014 14:59:24 -0700 Subject: x86/xsaves: Clean up code in xstate offsets computation in xsave area This patch cleans up some code in xstate offsets computation in xsave area: 1. It changes xstate_comp_offsets as an array. This avoids possible NULL pointer caused by possible kmalloc() failure during boot time. 2. It changes the global variable xstate_comp_sizes to a local variable because it is used only in setup_xstate_comp(). 3. It adds missing offsets for FP and SSE in xsave area. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1401387164-43416-17-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a6cb8233f628..940b142cc11f 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -26,7 +26,7 @@ struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes; -static unsigned int *xstate_comp_offsets, *xstate_comp_sizes; +static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; static unsigned int xstate_features; /* @@ -491,11 +491,16 @@ static void __init setup_xstate_features(void) */ void setup_xstate_comp(void) { + unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; int i; - xstate_comp_offsets = kmalloc(xstate_features * sizeof(int), - GFP_KERNEL); - xstate_comp_sizes = kmalloc(xstate_features * sizeof(int), GFP_KERNEL); + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); if (!cpu_has_xsaves) { for (i = 2; i < xstate_features; i++) { -- cgit v1.2.2 From d0f2dd186133a0241a2ccefb188a0e49e8187859 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Jun 2014 12:21:08 +0200 Subject: x86, xsave: Add forgotten inline annotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a missing inline annotation on a static function, in order to shut up a bunch of warnings like: In file included from arch/x86/crypto/camellia_aesni_avx_glue.c:23:0: ./arch/x86/include/asm/xsave.h:73:12: warning: ‘xsave_state_booting’ defined but not used [-Wunused-function] static int xsave_state_booting(struct xsave_struct *fx, u64 mask) ^ In file included from arch/x86/crypto/camellia_aesni_avx2_glue.c:23:0: ./arch/x86/include/asm/xsave.h:73:12: warning: ‘xsave_state_booting’ defined but not used [-Wunused-function] static int xsave_state_booting(struct xsave_struct *fx, u64 mask) ^ ... Cc: Fenghua Yu Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1403000468-30094-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index bbebd6e0a9ce..7e7a79ada658 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -70,7 +70,7 @@ extern int init_fpu(struct task_struct *child); * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static int xsave_state_booting(struct xsave_struct *fx, u64 mask) +static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; -- cgit v1.2.2 From b1bfd5ea451b60a4725907c282dec232c63f68bb Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:31 +0800 Subject: x86, mpparse: Use pr_lvl() helper utilities to replace printk(KERN_LVL) Use pr_lvl() helper utilities to replace printk(KERN_LVL) for readability, no function changes. Also use pr_cont() to avoid multiple newlines in one printk(). Signed-off-by: Jiang Liu Acked-by: David Rientjes Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 95 ++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d2b56489d70f..b10e1132f316 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -67,7 +67,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) boot_cpu_physical_apicid = m->apicid; } - printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); + pr_info("Processor #%d%s\n", m->apicid, bootup_cpu); generic_processor_info(apicid, m->apicver); } @@ -87,9 +87,8 @@ static void __init MP_bus_info(struct mpc_bus *m) #if MAX_MP_BUSSES < 256 if (m->busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->busid, str, MAX_MP_BUSSES - 1); + pr_warn("MP table busid value (%d) for bustype %s is too large, max. supported is %d\n", + m->busid, str, MAX_MP_BUSSES - 1); return; } #endif @@ -110,7 +109,7 @@ static void __init MP_bus_info(struct mpc_bus *m) mp_bus_id_to_type[m->busid] = MP_BUS_EISA; #endif } else - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); + pr_warn("Unknown bustype %s - ignoring\n", str); } static void __init MP_ioapic_info(struct mpc_ioapic *m) @@ -121,8 +120,8 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", + apic_printk(APIC_VERBOSE, + "Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->irqtype, mp_irq->irqflag & 3, (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); @@ -135,8 +134,8 @@ static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} static void __init MP_lintsrc_info(struct mpc_lintsrc *m) { - apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + apic_printk(APIC_VERBOSE, + "Lint: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, m->srcbusirq, m->destapic, m->destapiclint); } @@ -148,34 +147,33 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) { if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", + pr_err("MPTABLE: bad signature [%c%c%c%c]!\n", mpc->signature[0], mpc->signature[1], mpc->signature[2], mpc->signature[3]); return 0; } if (mpf_checksum((unsigned char *)mpc, mpc->length)) { - printk(KERN_ERR "MPTABLE: checksum error!\n"); + pr_err("MPTABLE: checksum error!\n"); return 0; } if (mpc->spec != 0x01 && mpc->spec != 0x04) { - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->spec); + pr_err("MPTABLE: bad table version (%d)!!\n", mpc->spec); return 0; } if (!mpc->lapic) { - printk(KERN_ERR "MPTABLE: null local APIC address!\n"); + pr_err("MPTABLE: null local APIC address!\n"); return 0; } memcpy(oem, mpc->oem, 8); oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); + pr_info("MPTABLE: OEM ID: %s\n", oem); memcpy(str, mpc->productid, 12); str[12] = 0; - printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); + pr_info("MPTABLE: Product ID: %s\n", str); - printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); + pr_info("MPTABLE: APIC at: 0x%X\n", mpc->lapic); return 1; } @@ -188,8 +186,8 @@ static void skip_entry(unsigned char **ptr, int *count, int size) static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) { - printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" - "type %x\n", *mpt); + pr_err("Your mptable is wrong, contact your HW vendor!\n"); + pr_cont("type %x\n", *mpt); print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, 1, mpc, mpc->length, 1); } @@ -259,7 +257,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) } if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); + pr_err("MPTABLE: no processors registered!\n"); return num_processors; } @@ -295,16 +293,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * If it does, we assume it's valid. */ if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " - "falling back to ELCR\n"); + pr_info("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... " - "not using ELCR\n"); + pr_err("ELCR contains invalid data... not using ELCR\n"); else { - printk(KERN_INFO - "Using ELCR to identify PCI interrupts\n"); + pr_info("Using ELCR to identify PCI interrupts\n"); ELCR_fallback = 1; } } @@ -353,7 +348,7 @@ static void __init construct_ioapic_table(int mpc_default_type) bus.busid = 0; switch (mpc_default_type) { default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", + pr_err("???\nUnknown standard configuration %d\n", mpc_default_type); /* fall through */ case 1: @@ -462,8 +457,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" - "... disabling SMP support. (tell your hw vendor)\n"); + pr_err("BIOS bug, MP table errors detected!...\n"); + pr_cont("... disabling SMP support. (tell your hw vendor)\n"); early_iounmap(mpc, size); return -1; } @@ -481,8 +476,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) if (!mp_irq_entries) { struct mpc_bus bus; - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. (tell your hw vendor)\n"); + pr_err("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); bus.type = MP_BUS; bus.busid = 0; @@ -516,14 +510,14 @@ void __init default_get_smp_config(unsigned int early) if (acpi_lapic && acpi_ioapic) return; - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->specification); + pr_info("Intel MultiProcessor Specification v1.%d\n", + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) if (mpf->feature2 & (1 << 7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); + pr_info(" IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); + pr_info(" Virtual Wire compatibility mode.\n"); pic_mode = 0; } #endif @@ -539,8 +533,7 @@ void __init default_get_smp_config(unsigned int early) return; } - printk(KERN_INFO "Default MP configuration #%d\n", - mpf->feature1); + pr_info("Default MP configuration #%d\n", mpf->feature1); construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { @@ -550,7 +543,7 @@ void __init default_get_smp_config(unsigned int early) BUG(); if (!early) - printk(KERN_INFO "Processors: %d\n", num_processors); + pr_info("Processors: %d\n", num_processors); /* * Only use the first configuration found. */ @@ -583,10 +576,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", - (unsigned long long) virt_to_phys(mpf), - (unsigned long long) virt_to_phys(mpf) + - sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", + (unsigned long long) virt_to_phys(mpf), + (unsigned long long) virt_to_phys(mpf) + + sizeof(*mpf) - 1, mpf); mem = virt_to_phys(mpf); memblock_reserve(mem, sizeof(*mpf)); @@ -735,7 +728,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, int nr_m_spare = 0; unsigned char *mpt = ((unsigned char *)mpc) + count; - printk(KERN_INFO "mpc_length %x\n", mpc->length); + pr_info("mpc_length %x\n", mpc->length); while (count < mpc->length) { switch (*mpt) { case MP_PROCESSOR: @@ -862,13 +855,13 @@ static int __init update_mp_table(void) if (!smp_check_mpc(mpc, oem, str)) return 0; - printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); - printk(KERN_INFO "physptr: %x\n", mpf->physptr); + pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf)); + pr_info("physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; - printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", - mpc_new_length); + pr_info("mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); } if (!mpc_new_phys) { @@ -879,10 +872,10 @@ static int __init update_mp_table(void) mpc->checksum = 0xff; new = mpf_checksum((unsigned char *)mpc, mpc->length); if (old == new) { - printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + pr_info("mpc is readonly, please try alloc_mptable instead\n"); return 0; } - printk(KERN_INFO "use in-position replacing\n"); + pr_info("use in-position replacing\n"); } else { mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); @@ -892,7 +885,7 @@ static int __init update_mp_table(void) if (mpc_new_phys - mpf->physptr) { struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ - printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + pr_info("mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; @@ -900,7 +893,7 @@ static int __init update_mp_table(void) } mpf->checksum = 0; mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "physptr new: %x\n", mpf->physptr); + pr_info("physptr new: %x\n", mpf->physptr); } /* -- cgit v1.2.2 From a491cc902ca495365e9cd45154b60d8c702d86da Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:32 +0800 Subject: x86, mpparse: Simplify arch/x86/include/asm/mpspec.h Simplify arch/x86/include/asm/mpspec.h by 1) Change max_physical_apicid to static as it's only used in apic.c. 2) Kill declaration of mpc_default_type, it's never defined. 3) Delete default_acpi_madt_oem_check(), it has already been declared in apic.h. 4) Make default_acpi_madt_oem_check() depends on CONFIG_X86_LOCAL_APIC instead of CONFIG_X86_64 to support i386. 5) Change mp_override_legacy_irq(), mp_config_acpi_legacy_irqs() and mp_register_gsi() as static because they are only used in acpi/boot.c. Signed-off-by: Jiang Liu Acked-by: David Rientjes Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Cc: Seiji Aguchi Cc: HATAYAMA Daisuke Cc: Paul Gortmaker Cc: Richard Weinberger Cc: Andi Kleen Link: http://lkml.kernel.org/r/1402302011-23642-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 4 ++-- arch/x86/include/asm/io_apic.h | 3 --- arch/x86/include/asm/mpspec.h | 13 ------------- arch/x86/kernel/acpi/boot.c | 12 +++++++++--- arch/x86/kernel/apic/apic.c | 2 +- 5 files changed, 12 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 19b0ebafcd3e..69ed79aa9085 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -502,8 +502,6 @@ static inline unsigned default_get_apic_id(unsigned long x) #define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 #ifdef CONFIG_X86_64 -extern int default_acpi_madt_oem_check(char *, char *); - extern void apic_send_IPI_self(int vector); DECLARE_PER_CPU(int, x2apic_extra_bits); @@ -552,6 +550,8 @@ static inline int default_apic_id_valid(int apicid) return (apicid < 255); } +extern int default_acpi_madt_oem_check(char *, char *); + extern void default_setup_apic_routing(void); extern struct apic apic_noop; diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 90f97b4b9347..9121abbc8c8f 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -118,9 +118,6 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* non-0 if default (table-less) MP configuration */ -extern int mpc_default_type; - /* Older SiS APIC requires we rewrite the index register */ extern int sis_apic_bug; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index f5a617956735..7bef40a01a1d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -40,8 +40,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); extern unsigned int boot_cpu_physical_apicid; -extern unsigned int max_physical_apicid; -extern int mpc_default_type; extern unsigned long mp_lapic_addr; #ifdef CONFIG_X86_LOCAL_APIC @@ -88,15 +86,6 @@ static inline void early_reserve_e820_mpc_new(void) { } #endif int generic_processor_info(int apicid, int version); -#ifdef CONFIG_ACPI -extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); -extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, - u32 gsi); -extern void mp_config_acpi_legacy_irqs(void); -struct device; -extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, - int active_high_low); -#endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) @@ -163,6 +152,4 @@ extern physid_mask_t phys_cpu_present_map; extern int generic_mps_oem_check(struct mpc_table *, char *, char *); -extern int default_acpi_madt_oem_check(char *, char *); - #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 86281ffb96d6..b41b47021f53 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -345,6 +345,10 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #endif /*CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +static void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi); +static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, + int polarity); static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) @@ -903,7 +907,8 @@ static int __init acpi_parse_madt_lapic_entries(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi) { int ioapic; int pin; @@ -938,7 +943,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) isa_irq_to_gsi[bus_irq] = gsi; } -void __init mp_config_acpi_legacy_irqs(void) +static void __init mp_config_acpi_legacy_irqs(void) { int i; struct mpc_intsrc mp_irq; @@ -1040,7 +1045,8 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } -int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) { int ioapic; int ioapic_pin; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ad28db7e6bde..ca1bd75e3de2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); /* * The highest APIC ID seen during enumeration. */ -unsigned int max_physical_apicid; +static unsigned int max_physical_apicid; /* * Bitmask of physically existing CPUs: -- cgit v1.2.2 From 8d7cdcb9d8f366c0567b66cce0d5ce37d3311aaf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:33 +0800 Subject: x86, acpi: Reorganize code to avoid forward declaration in boot.c Reorganize code to avoid forward declaration in boot.c, no function changes. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-5-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 236 ++++++++++++++++++++++---------------------- 1 file changed, 116 insertions(+), 120 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b41b47021f53..ceb3b36f2b2c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -345,10 +345,123 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #endif /*CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -static void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, - u32 gsi); +#define MP_ISA_BUS 0 + +static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi) +{ + int ioapic; + int pin; + struct mpc_intsrc mp_irq; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = mp_find_ioapic_pin(ioapic, gsi); + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ + + mp_save_irq(&mp_irq); + + isa_irq_to_gsi[bus_irq] = gsi; +} + +static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) +{ +#ifdef CONFIG_X86_MPPARSE + struct mpc_intsrc mp_irq; + struct pci_dev *pdev; + unsigned char number; + unsigned int devfn; + int ioapic; + u8 pin; + + if (!acpi_ioapic) + return 0; + if (!dev || !dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + number = pdev->bus->number; + devfn = pdev->devfn; + pin = pdev->pin; + /* print the entry should happen on mptable identically */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); + + mp_save_irq(&mp_irq); +#endif + return 0; +} + static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, - int polarity); + int polarity) +{ + int ioapic; + int ioapic_pin; + struct io_apic_irq_attr irq_attr; + int ret; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); + + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mpc_ioapic_id(ioapic), + ioapic_pin); + return gsi; + } + + if (enable_update_mptable) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); + + set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, + trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); + if (ret < 0) + gsi = INT_MIN; + + return gsi; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) @@ -905,44 +1018,6 @@ static int __init acpi_parse_madt_lapic_entries(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -#define MP_ISA_BUS 0 - -static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, - u32 gsi) -{ - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - - isa_irq_to_gsi[bus_irq] = gsi; -} - static void __init mp_config_acpi_legacy_irqs(void) { int i; @@ -1009,85 +1084,6 @@ static void __init mp_config_acpi_legacy_irqs(void) } } -static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ -#ifdef CONFIG_X86_MPPARSE - struct mpc_intsrc mp_irq; - struct pci_dev *pdev; - unsigned char number; - unsigned int devfn; - int ioapic; - u8 pin; - - if (!acpi_ioapic) - return 0; - if (!dev || !dev_is_pci(dev)) - return 0; - - pdev = to_pci_dev(dev); - number = pdev->bus->number; - devfn = pdev->devfn; - pin = pdev->pin; - /* print the entry should happen on mptable identically */ - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | - (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.srcbus = number; - mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); - ioapic = mp_find_ioapic(gsi); - mp_irq.dstapic = mpc_ioapic_id(ioapic); - mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); - - mp_save_irq(&mp_irq); -#endif - return 0; -} - -static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ - int ioapic; - int ioapic_pin; - struct io_apic_irq_attr irq_attr; - int ret; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); - - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mpc_ioapic_id(ioapic), - ioapic_pin); - return gsi; - } - - if (enable_update_mptable) - mp_config_acpi_gsi(dev, gsi, trigger, polarity); - - set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, - trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); - if (ret < 0) - gsi = INT_MIN; - - return gsi; -} - /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error -- cgit v1.2.2 From 965cd0e4a5e5d704934fa6d476b1a4faa0417e1b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:34 +0800 Subject: x86, PCI, ACPI: Use kmalloc_node() to optimize for performance Use kmalloc_node() instead of kmalloc() when possible to optimize for performance on NUMA platforms. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Acked-by: David Rientjes Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-6-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 5075371ab593..cfd1b132b8e3 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -448,7 +448,7 @@ static void probe_pci_root_info(struct pci_root_info *info, return; size = sizeof(*info->res) * info->res_num; - info->res = kzalloc(size, GFP_KERNEL); + info->res = kzalloc_node(size, GFP_KERNEL, info->sd.node); if (!info->res) { info->res_num = 0; return; @@ -456,7 +456,7 @@ static void probe_pci_root_info(struct pci_root_info *info, size = sizeof(*info->res_offset) * info->res_num; info->res_num = 0; - info->res_offset = kzalloc(size, GFP_KERNEL); + info->res_offset = kzalloc_node(size, GFP_KERNEL, info->sd.node); if (!info->res_offset) { kfree(info->res); info->res = NULL; @@ -499,7 +499,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) if (node != NUMA_NO_NODE && !node_online(node)) node = NUMA_NO_NODE; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); if (!info) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); -- cgit v1.2.2 From 032329eebba86350849c027fe3a0b672558893d4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:35 +0800 Subject: x86, acpi, irq: Kill static function irq_to_gsi() Static function irq_to_gsi() is only called by acpi_isa_irq_to_gsi(), so kill function irq_to_gsi() and simplify acpi_isa_irq_to_gsi(). Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-7-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ceb3b36f2b2c..09682766d56a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -120,22 +120,6 @@ static unsigned int gsi_to_irq(unsigned int gsi) return irq; } -static u32 irq_to_gsi(int irq) -{ - unsigned int gsi; - - if (irq < NR_IRQS_LEGACY) - gsi = isa_irq_to_gsi[irq]; - else if (irq < gsi_top) - gsi = irq; - else if (irq < (gsi_top + NR_IRQS_LEGACY)) - gsi = irq - gsi_top; - else - gsi = 0xffffffff; - - return gsi; -} - /* * This is just a simple wrapper around early_ioremap(), * with sanity checks for phys == 0 and size == 0. @@ -640,10 +624,12 @@ EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { - if (isa_irq >= 16) - return -1; - *gsi = irq_to_gsi(isa_irq); - return 0; + if (isa_irq < NR_IRQS_LEGACY) { + *gsi = isa_irq_to_gsi[isa_irq]; + return 0; + } + + return -1; } static int acpi_register_gsi_pic(struct device *dev, u32 gsi, -- cgit v1.2.2 From e819813f5cb98030bab38569abffd9354a1c1a82 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:36 +0800 Subject: x86, ACPI, trivial: Minor improvements to arch/x86/kernel/acpi/boot.c 1) Remove out-of-date comment 2) Kill unused function acpi_set_irq_model_pic() 3) Use NR_IRQS_LEGACY instead of hard-coded 16 4) Trivial syntax improvements Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Cc: Jiri Kosina Link: http://lkml.kernel.org/r/1402302011-23642-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 50 +++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 09682766d56a..392360c607dc 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -483,11 +483,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 - * If GSI is < 16, this will update its flags, - * else it will create a new mp_irqs[] entry. - */ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); /* @@ -686,14 +681,7 @@ void acpi_unregister_gsi(u32 gsi) } EXPORT_SYMBOL_GPL(acpi_unregister_gsi); -void __init acpi_set_irq_model_pic(void) -{ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - __acpi_register_gsi = acpi_register_gsi_pic; - acpi_ioapic = 0; -} - -void __init acpi_set_irq_model_ioapic(void) +static void __init acpi_set_irq_model_ioapic(void) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; __acpi_register_gsi = acpi_register_gsi_ioapic; @@ -932,9 +920,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); @@ -959,9 +946,8 @@ static int __init acpi_parse_madt_lapic_entries(void) * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). */ - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); @@ -989,11 +975,10 @@ static int __init acpi_parse_madt_lapic_entries(void) return count; } - x2count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, - acpi_parse_x2apic_nmi, 0); - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, + acpi_parse_x2apic_nmi, 0); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, + acpi_parse_lapic_nmi, 0); if (count < 0 || x2count < 0) { printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); /* TBD: Cleanup to allow fallback to MPS */ @@ -1022,7 +1007,7 @@ static void __init mp_config_acpi_legacy_irqs(void) * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. */ - for (i = 0; i < 16; i++) { + for (i = 0; i < NR_IRQS_LEGACY; i++) { int ioapic, pin; unsigned int dstapic; int idx; @@ -1099,9 +1084,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) return -ENODEV; } - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, - MAX_IO_APICS); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, + MAX_IO_APICS); if (!count) { printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); return -ENODEV; @@ -1110,9 +1094,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - nr_irqs); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, + acpi_parse_int_src_ovr, nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); @@ -1131,9 +1114,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) /* Fill in identity legacy mappings where no override */ mp_config_acpi_legacy_irqs(); - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - nr_irqs); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, + acpi_parse_nmi_src, nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ -- cgit v1.2.2 From 2c0a6894df19515baf9d2bf0076a2b57c8b51efb Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 10 Jun 2014 14:11:23 +0800 Subject: x86, ACPI, irq: Enhance error handling in function acpi_register_gsi() Function mp_register_gsi() may return error code when failed to look up or program corresponding IOAPIC pin for GSI, so enhance acpi_register_gsi() to handle possible error cases. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402380683-32345-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 392360c607dc..298f79616f1c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -97,6 +97,8 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +#define ACPI_INVALID_GSI INT_MIN + static unsigned int gsi_to_irq(unsigned int gsi) { unsigned int irq = gsi + NR_IRQS_LEGACY; @@ -441,7 +443,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); if (ret < 0) - gsi = INT_MIN; + gsi = ACPI_INVALID_GSI; return gsi; } @@ -666,13 +668,13 @@ int (*acpi_suspend_lowlevel)(void); */ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { - unsigned int irq; - unsigned int plat_gsi = gsi; + unsigned int plat_gsi; - plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity); - irq = gsi_to_irq(plat_gsi); + plat_gsi = __acpi_register_gsi(dev, gsi, trigger, polarity); + if (plat_gsi != ACPI_INVALID_GSI) + return gsi_to_irq(plat_gsi); - return irq; + return -1; } EXPORT_SYMBOL_GPL(acpi_register_gsi); -- cgit v1.2.2 From 2e0ad0e2c13534f1a0e7f63661e666c281e09b66 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:38 +0800 Subject: x86, ACPI, irq: Fix possible eror in GSI to IRQ mapping for legacy IRQ A default identity mapping between GSI and IRQ is built for legacy IRQs. So when overriding the default identity mapping for legacy IRQs, we should also invalidate isa_irq_to_gsi[gsi] when setting isa_irq_to_gsi[irq] = gsi. Otherwise there may be two entries with the same GSI in the isa_irq_to_gsi array, and acpi_isa_irq_to_gsi() may give wrong result. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-10-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 298f79616f1c..f3bafcd32b98 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -366,6 +366,13 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, mp_save_irq(&mp_irq); + /* + * Reset default identity mapping if gsi is also an legacy IRQ, + * otherwise there will be more than one entry with the same GSI + * and acpi_isa_irq_to_gsi() may give wrong result. + */ + if (gsi < NR_IRQS_LEGACY && isa_irq_to_gsi[gsi] == gsi) + isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI; isa_irq_to_gsi[bus_irq] = gsi; } @@ -621,7 +628,8 @@ EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { - if (isa_irq < NR_IRQS_LEGACY) { + if (isa_irq < NR_IRQS_LEGACY && + isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) { *gsi = isa_irq_to_gsi[isa_irq]; return 0; } -- cgit v1.2.2 From 3eb2be5f49fdeac5ea2880aec90008f0a8250029 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:39 +0800 Subject: x86, irq, trivial: Minor improvements of IRQ related code 1) Kill unused MAX_HARDIRQS_PER_CPU. 2) Improve function prototype declararions. 3) Simple typo fix, change "gsit" to "gsi". 4) Use macro VECTOR_UNDEFINED instead of hard-coded -1. 5) Kill redundant comments. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Jiri Kosina Link: http://lkml.kernel.org/r/1402302011-23642-11-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hardirq.h | 3 --- arch/x86/include/asm/io_apic.h | 16 +++++++--------- arch/x86/kernel/apic/io_apic.c | 21 +++++---------------- 3 files changed, 12 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 230853da4ec0..0f5fb6b6567e 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -40,9 +40,6 @@ typedef struct { DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - #define __ARCH_IRQ_STAT #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9121abbc8c8f..8dd1e13f2989 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -144,12 +144,9 @@ struct io_apic_irq_attr; struct irq_cfg; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); -void setup_IO_APIC_irq_extra(u32 gsi); +extern void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_insert_resources(void); -extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, - unsigned int, int, - struct io_apic_irq_attr *); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); @@ -159,7 +156,8 @@ extern void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); -int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); +extern int io_apic_setup_irq_pin_once(unsigned int irq, int node, + struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); extern void mask_ioapic_entries(void); @@ -172,11 +170,11 @@ struct mp_ioapic_gsi{ u32 gsi_base; u32 gsi_end; }; -extern struct mp_ioapic_gsi mp_gsi_routing[]; extern u32 gsi_top; -int mp_find_ioapic(u32 gsi); -int mp_find_ioapic_pin(int ioapic, u32 gsi); -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); + +extern int mp_find_ioapic(u32 gsi); +extern int mp_find_ioapic_pin(int ioapic, u32 gsi); +extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void __init pre_init_apic_IRQ0(void); extern void mp_save_irq(struct mpc_intsrc *m); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 81e08eff05ee..f3390424d86e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1010,7 +1010,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && + mp_irqs[i].irqtype == mp_INT && (bus == lbus) && (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); @@ -1359,7 +1359,7 @@ static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) irq = pin_2_irq(idx, ioapic_idx, pin); - if ((ioapic_idx > 0) && (irq > 16)) + if ((ioapic_idx > 0) && (irq > NR_IRQS_LEGACY)) continue; /* @@ -1388,7 +1388,7 @@ static void __init setup_IO_APIC_irqs(void) } /* - * for the gsit that is not in first ioapic + * for the gsi that is not in first ioapic * but could not use acpi_register_gsi() * like some special sci in IBM x3330 */ @@ -2225,7 +2225,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __this_cpu_write(vector_irq[vector], -1); + __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); unlock: raw_spin_unlock(&desc->lock); } @@ -2514,17 +2514,6 @@ static inline void init_IO_APIC_traps(void) struct irq_cfg *cfg; unsigned int irq; - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ for_each_active_irq(irq) { cfg = irq_get_chip_data(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { @@ -3550,7 +3539,7 @@ void __init setup_ioapic_dest(void) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - if ((ioapic > 0) && (irq > 16)) + if ((ioapic > 0) && (irq > NR_IRQS_LEGACY)) continue; idata = irq_get_irq_data(irq); -- cgit v1.2.2 From 4035ed0134b2dc545a0b22e3c052f684786649d4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:40 +0800 Subject: x86, ioapic: Kill unused global variable timer_through_8259 Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-12-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 4 ---- arch/x86/kernel/apic/io_apic.c | 3 --- 2 files changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 8dd1e13f2989..de3d8b04cf64 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -130,9 +130,6 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; -/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ -extern int timer_through_8259; - /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -212,7 +209,6 @@ extern void io_apic_eoi(unsigned int apic, unsigned int vector); #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop -static const int timer_through_8259 = 0; static inline void ioapic_insert_resources(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f3390424d86e..18ab95b6f1db 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2638,8 +2638,6 @@ static int __init disable_timer_pin_setup(char *arg) } early_param("disable_timer_pin_1", disable_timer_pin_setup); -int timer_through_8259 __initdata; - /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2744,7 +2742,6 @@ static inline void __init check_timer(void) legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; goto out; } /* -- cgit v1.2.2 From 518b2c63fcdde0723d2719bbe5b14086bdc8ec80 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:41 +0800 Subject: x86, ioapic: Kill static variable nr_irqs_gsi Static variable nr_irqs_gsi is used to maintain the lowest dynamic allocatable IRQ number. It may cause trouble when enabling dynamic IRQ allocation for IOAPIC, so use arch_dynirq_lower_bound() to avoid directly accessing nr_irqs_gsi and kill nr_irqs_gsi. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-13-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 18ab95b6f1db..46d09eac1777 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -118,9 +118,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* GSI interrupts */ -static int nr_irqs_gsi = NR_IRQS_LEGACY; - #ifdef CONFIG_EISA int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif @@ -3326,20 +3323,11 @@ static int __init io_apic_get_redir_entries(int ioapic) return reg_01.bits.entries + 1; } -static void __init probe_nr_irqs_gsi(void) -{ - int nr; - - nr = gsi_top + NR_IRQS_LEGACY; - if (nr > nr_irqs_gsi) - nr_irqs_gsi = nr; - - printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); -} - unsigned int arch_dynirq_lower_bound(unsigned int from) { - return from < nr_irqs_gsi ? nr_irqs_gsi : from; + unsigned int min = gsi_top + NR_IRQS_LEGACY; + + return from < min ? min : from; } int __init arch_probe_nr_irqs(void) @@ -3349,12 +3337,12 @@ int __init arch_probe_nr_irqs(void) if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) nr_irqs = NR_VECTORS * nr_cpu_ids; - nr = nr_irqs_gsi + 8 * nr_cpu_ids; + nr = (gsi_top + NR_IRQS_LEGACY) + 8 * nr_cpu_ids; #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) /* * for MSI and HT dyn irq */ - nr += nr_irqs_gsi * 16; + nr += (gsi_top + NR_IRQS_LEGACY) * 16; #endif if (nr < nr_irqs) nr_irqs = nr; @@ -3627,8 +3615,6 @@ fake_ioapic_page: ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } - - probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) -- cgit v1.2.2 From f44d16929638a6dc34bdd51e7422e7e3c1d0b904 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:42 +0800 Subject: x86, ioapic: Introduce helper utilities to walk ioapics and pins Introduce helper utilities for_each_ioapic(), for_each_ioapic_reverse(), for_each_pin() and for_each_ioapic_pin() to walk ioapics and pins. They will be rewritten e will rewrite later to support IOAPIC hotplug. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-14-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 120 +++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 58 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 46d09eac1777..16b0247392a5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -62,6 +62,16 @@ #define __apicdebuginit(type) static type __init +#define for_each_ioapic(idx) \ + for ((idx) = 0; (idx) < nr_ioapics; (idx)++) +#define for_each_ioapic_reverse(idx) \ + for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--) +#define for_each_pin(idx, pin) \ + for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++) +#define for_each_ioapic_pin(idx, pin) \ + for_each_ioapic((idx)) \ + for_each_pin((idx), (pin)) + #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) @@ -191,7 +201,7 @@ int __init arch_early_irq_init(void) if (!legacy_pic->nr_legacy_irqs) io_apic_irqs = ~0UL; - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { ioapics[i].saved_registers = kzalloc(sizeof(struct IO_APIC_route_entry) * ioapics[i].nr_registers, GFP_KERNEL); @@ -624,9 +634,8 @@ static void clear_IO_APIC (void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) - clear_IO_APIC_pin(apic, pin); + for_each_ioapic_pin(apic, pin) + clear_IO_APIC_pin(apic, pin); } #ifdef CONFIG_X86_32 @@ -675,13 +684,13 @@ int save_ioapic_entries(void) int apic, pin; int err = 0; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) { err = -ENOMEM; continue; } - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) + for_each_pin(apic, pin) ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin); } @@ -696,11 +705,11 @@ void mask_ioapic_entries(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) continue; - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { + for_each_pin(apic, pin) { struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; @@ -719,11 +728,11 @@ int restore_ioapic_entries(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) { + for_each_ioapic(apic) { if (!ioapics[apic].saved_registers) continue; - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) + for_each_pin(apic, pin) ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]); } @@ -782,7 +791,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int ioapic_idx; - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) return ioapic_idx; } @@ -1001,7 +1010,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || mp_irqs[i].dstapic == MP_APIC_ALL) break; @@ -1224,12 +1233,10 @@ static inline int IO_APIC_irq_trigger(int irq) { int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } + for_each_ioapic_pin(apic, pin) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); } /* * nonexistent IRQs are edge default @@ -1349,7 +1356,7 @@ static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) struct io_apic_irq_attr attr; unsigned int pin, irq; - for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { + for_each_pin(ioapic_idx, pin) { idx = find_irq_entry(ioapic_idx, pin, mp_INT); if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) continue; @@ -1380,7 +1387,7 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) __io_apic_setup_irqs(ioapic_idx); } @@ -1583,7 +1590,7 @@ __apicdebuginit(void) print_IO_APICs(void) struct irq_chip *chip; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers); @@ -1594,7 +1601,7 @@ __apicdebuginit(void) print_IO_APICs(void) */ printk(KERN_INFO "testing the IO APIC.......................\n"); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + for_each_ioapic(ioapic_idx) print_IO_APIC(ioapic_idx); printk(KERN_DEBUG "IRQ to pin mappings:\n"); @@ -1825,26 +1832,22 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; void __init enable_IO_APIC(void) { int i8259_apic, i8259_pin; - int apic; + int apic, pin; if (!legacy_pic->nr_legacy_irqs) return; - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; + for_each_ioapic_pin(apic, pin) { /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); + struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin); - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; } } found_i8259: @@ -1947,7 +1950,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { + for_each_ioapic(ioapic_idx) { /* Read the register 0 value */ raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic_idx, 0); @@ -2863,7 +2866,7 @@ static void ioapic_resume(void) { int ioapic_idx; - for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) + for_each_ioapic_reverse(ioapic_idx) resume_ioapic_id(ioapic_idx); restore_ioapic_entries(); @@ -3457,9 +3460,8 @@ static u8 __init io_apic_unique_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) __set_bit(mpc_ioapic_id(i), used); - } if (!test_bit(id, used)) return id; return find_first_zero_bit(used, 256); @@ -3517,8 +3519,7 @@ void __init setup_ioapic_dest(void) if (skip_ioapic_setup == 1) return; - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) - for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) { + for_each_ioapic_pin(ioapic, pin) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; @@ -3547,29 +3548,33 @@ void __init setup_ioapic_dest(void) static struct resource *ioapic_resources; -static struct resource * __init ioapic_setup_resources(int nr_ioapics) +static struct resource * __init ioapic_setup_resources(void) { unsigned long n; struct resource *res; char *mem; - int i; + int i, num = 0; - if (nr_ioapics <= 0) + for_each_ioapic(i) + num++; + if (num == 0) return NULL; n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; + n *= num; mem = alloc_bootmem(n); res = (void *)mem; - mem += sizeof(struct resource) * nr_ioapics; + mem += sizeof(struct resource) * num; - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + num = 0; + for_each_ioapic(i) { + res[num].name = mem; + res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; + num++; } ioapic_resources = res; @@ -3583,8 +3588,8 @@ void __init native_io_apic_init_mappings(void) struct resource *ioapic_res; int i; - ioapic_res = ioapic_setup_resources(nr_ioapics); - for (i = 0; i < nr_ioapics; i++) { + ioapic_res = ioapic_setup_resources(); + for_each_ioapic(i) { if (smp_found_config) { ioapic_phys = mpc_ioapic_addr(i); #ifdef CONFIG_X86_32 @@ -3629,7 +3634,7 @@ void __init ioapic_insert_resources(void) return; } - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { insert_resource(&iomem_resource, r); r++; } @@ -3637,16 +3642,15 @@ void __init ioapic_insert_resources(void) int mp_find_ioapic(u32 gsi) { - int i = 0; + int i; if (nr_ioapics == 0) return -1; /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { + for_each_ioapic(i) { struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); - if ((gsi >= gsi_cfg->gsi_base) - && (gsi <= gsi_cfg->gsi_end)) + if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end) return i; } @@ -3658,7 +3662,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) { struct mp_ioapic_gsi *gsi_cfg; - if (WARN_ON(ioapic == -1)) + if (WARN_ON(ioapic < 0)) return -1; gsi_cfg = mp_ioapic_gsi_routing(ioapic); -- cgit v1.2.2 From 32f5ef5d8dd5ac3cc7ac12cdaf33023e2fbd33c1 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:43 +0800 Subject: x86, ioapic: Use irq_cfg() instead of irq_get_chip_data() for better readability Use defined helper function irq_cfg() instead of irq_get_chip_data() for better readability. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-15-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 16b0247392a5..446a931d5f44 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -230,7 +230,7 @@ int __init arch_early_irq_init(void) return 0; } -static struct irq_cfg *irq_cfg(unsigned int irq) +static inline struct irq_cfg *irq_cfg(unsigned int irq) { return irq_get_chip_data(irq); } @@ -272,7 +272,7 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) if (res < 0) { if (res != -EEXIST) return NULL; - cfg = irq_get_chip_data(at); + cfg = irq_cfg(at); if (cfg) return cfg; } @@ -1204,7 +1204,7 @@ void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) continue; @@ -1612,7 +1612,7 @@ __apicdebuginit(void) print_IO_APICs(void) if (chip != &ioapic_chip) continue; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) continue; entry = cfg->irq_2_pin; @@ -2253,7 +2253,7 @@ static void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!cfg) return; @@ -2515,7 +2515,7 @@ static inline void init_IO_APIC_traps(void) unsigned int irq; for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, @@ -2648,7 +2648,7 @@ early_param("disable_timer_pin_1", disable_timer_pin_setup); */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_get_chip_data(0); + struct irq_cfg *cfg = irq_cfg(0); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2912,7 +2912,7 @@ int arch_setup_hwirq(unsigned int irq, int node) void arch_teardown_hwirq(unsigned int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; free_remapped_irq(irq); @@ -3039,7 +3039,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (!irq_offset) write_msi_msg(irq, &msg); - setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + setup_remapped_irq(irq, irq_cfg(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3178,7 +3178,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + setup_remapped_irq(irq, irq_cfg(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; -- cgit v1.2.2 From 79598505aee61bc943955de3653be054c2f7393f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:44 +0800 Subject: x86, irq: Reorganize IO_APIC_get_PCI_irq_vector() to prepare for irqdomain Reorganize function IO_APIC_get_PCI_irq_vector() a bit to better support coming irqdomain. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-16-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 61 +++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 446a931d5f44..09e5c7b4df5a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -997,7 +997,7 @@ static int pin_2_irq(int idx, int apic, int pin) int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, struct io_apic_irq_attr *irq_attr) { - int ioapic_idx, i, best_guess = -1; + int irq, i, best_guess = -1; apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", @@ -1007,41 +1007,46 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } + for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; + int ioapic_idx, found = 0; + + if (bus != lbus || mp_irqs[i].irqtype != mp_INT || + slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f)) + continue; for_each_ioapic(ioapic_idx) if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) + mp_irqs[i].dstapic == MP_APIC_ALL) { + found = 1; break; + } + if (!found) + continue; - if (!test_bit(lbus, mp_bus_not_pci) && - mp_irqs[i].irqtype == mp_INT && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); - - if (!(ioapic_idx || IO_APIC_IRQ(irq))) - continue; + /* Skip ISA IRQs */ + irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); + if (ioapic_idx == 0 && !IO_APIC_IRQ(irq)) + continue; - if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - return irq; - } - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - best_guess = irq; - } + if (pin == (mp_irqs[i].srcbusirq & 3)) { + set_io_apic_irq_attr(irq_attr, ioapic_idx, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + return irq; + } + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) { + set_io_apic_irq_attr(irq_attr, ioapic_idx, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + best_guess = irq; } } return best_guess; -- cgit v1.2.2 From 18e485518656205bbffce5e01f07830a6c3f557d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:45 +0800 Subject: x86, irq: Introduce some helper utilities to improve readability It also fixes an off by one bug in if ((ioapic_idx > 0) && (irq > NR_IRQS_LEGACY)) It should be if ((ioapic_idx > 0) && (irq >= NR_IRQS_LEGACY)) Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-17-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 2 ++ arch/x86/kernel/apic/io_apic.c | 29 ++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index de3d8b04cf64..b775cf3622c3 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -171,6 +171,7 @@ extern u32 gsi_top; extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); +extern u32 mp_pin_to_gsi(int ioapic, int pin); extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void __init pre_init_apic_IRQ0(void); @@ -212,6 +213,7 @@ extern void io_apic_eoi(unsigned int apic, unsigned int vector); static inline void ioapic_insert_resources(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } +static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } struct io_apic_irq_attr; static inline int io_apic_set_pci_routing(struct device *dev, int irq, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 09e5c7b4df5a..424f795a1c78 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -117,6 +117,24 @@ struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) return &ioapics[ioapic_idx].gsi_config; } +static inline int mp_ioapic_pin_count(int ioapic) +{ + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + + return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; +} + +u32 mp_pin_to_gsi(int ioapic, int pin) +{ + return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; +} + +/* Initialize all legacy IRQs and all pins on the first IOAPIC at boot */ +static inline int mp_init_irq_at_boot(int ioapic, int irq) +{ + return ioapic == 0 || (irq >= 0 && irq < NR_IRQS_LEGACY); +} + int nr_ioapics; /* The one past the highest gsi number used */ @@ -1367,8 +1385,7 @@ static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) continue; irq = pin_2_irq(idx, ioapic_idx, pin); - - if ((ioapic_idx > 0) && (irq > NR_IRQS_LEGACY)) + if (!mp_init_irq_at_boot(ioapic_idx, irq)) continue; /* @@ -1419,9 +1436,7 @@ void setup_IO_APIC_irq_extra(u32 gsi) return; irq = pin_2_irq(idx, ioapic_idx, pin); - - /* Only handle the non legacy irqs on secondary ioapics */ - if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) + if (mp_init_irq_at_boot(ioapic_idx, irq)) return; set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), @@ -3528,9 +3543,9 @@ void __init setup_ioapic_dest(void) irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - if ((ioapic > 0) && (irq > NR_IRQS_LEGACY)) + irq = pin_2_irq(irq_entry, ioapic, pin); + if (!mp_init_irq_at_boot(ioapic, irq)) continue; idata = irq_get_irq_data(irq); -- cgit v1.2.2 From 6084a6e23c971ef703229ee1aec68d01688578d6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:46 +0800 Subject: x86: ce4100, irq: Make CE4100 depend on CONFIG_X86_IO_APIC Intel CE4100 platforms need IOAPIC support becasue some devices are always connected to the second IOAPIC, so make CONFIG_CE depends on CONFIG_X86_IO_APIC. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-18-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + arch/x86/platform/ce4100/ce4100.c | 9 ++------- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749ef0fdc..9df5c453171e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -429,6 +429,7 @@ config X86_INTEL_CE bool "CE4100 TV platform" depends on PCI depends on PCI_GODIRECT + depends on X86_IO_APIC depends on X86_32 depends on X86_EXTENDED_PLATFORM select X86_REBOOTFIXUPS diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 8244f5ec2f4c..0a54a586559b 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -135,14 +135,12 @@ static void __init sdv_arch_setup(void) sdv_serial_fixup(); } -#ifdef CONFIG_X86_IO_APIC static void sdv_pci_init(void) { x86_of_pci_init(); /* We can't set this earlier, because we need to calibrate the timer */ legacy_pic = &null_legacy_pic; } -#endif /* * CE4100 specific x86_init function overrides and early setup @@ -155,7 +153,9 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; x86_init.pci.init = ce4100_pci_init; + x86_init.pci.init_irq = sdv_pci_init; /* * By default, the reboot method is ACPI which is supported by the @@ -166,10 +166,5 @@ void __init x86_ce4100_early_setup(void) */ reboot_type = BOOT_KBD; -#ifdef CONFIG_X86_IO_APIC - x86_init.pci.init_irq = sdv_pci_init; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; -#endif - pm_power_off = ce4100_power_off; } -- cgit v1.2.2 From 6532ce994c304835f3bfc8479acce9d102cd8b5b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:47 +0800 Subject: x86: ce4100, irq: Do not set legacy_pic to null_legacy_pic Intel CE4100 platforms has i8259 legacy interrupt controllers, so don't set legacy_pic to null_legacy_pic in late booting stage because we need legacy_pic to mask i8259 pins when enabling IOAPIC pins for safety. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-19-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/ce4100/ce4100.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 0a54a586559b..701fd5843c87 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -138,8 +138,6 @@ static void __init sdv_arch_setup(void) static void sdv_pci_init(void) { x86_of_pci_init(); - /* We can't set this earlier, because we need to calibrate the timer */ - legacy_pic = &null_legacy_pic; } /* -- cgit v1.2.2 From 95d76acc7518d566df18d67c1343bb375b78d1f3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:48 +0800 Subject: x86, irq: Count legacy IRQs by legacy_pic->nr_legacy_irqs instead of NR_IRQS_LEGACY Some platforms, such as Intel MID and mshypv, do not support legacy interrupt controllers. So count legacy IRQs by legacy_pic->nr_legacy_irqs instead of hard-coded NR_IRQS_LEGACY. Signed-off-by: Jiang Liu Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: xen-devel@lists.xenproject.org Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Cc: Konrad Rzeszutek Wilk Cc: Rob Herring Cc: Michal Simek Cc: Tony Lindgren Acked-by: David Vrabel Link: http://lkml.kernel.org/r/1402302011-23642-20-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/i8259.h | 5 +++++ arch/x86/kernel/acpi/boot.c | 13 +++++++------ arch/x86/kernel/apic/io_apic.c | 43 ++++++++++++++++++++++++------------------ arch/x86/kernel/devicetree.c | 13 +++++++------ arch/x86/kernel/irqinit.c | 6 +++--- arch/x86/pci/xen.c | 7 ++++--- 6 files changed, 51 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index a20365953bf8..ccffa53750a8 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -67,4 +67,9 @@ struct legacy_pic { extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; +static inline int nr_legacy_irqs(void) +{ + return legacy_pic->nr_legacy_irqs; +} + #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f3bafcd32b98..b12976590a72 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ static int __initdata acpi_force = 0; @@ -101,10 +102,10 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { static unsigned int gsi_to_irq(unsigned int gsi) { - unsigned int irq = gsi + NR_IRQS_LEGACY; + unsigned int irq = gsi + nr_legacy_irqs(); unsigned int i; - for (i = 0; i < NR_IRQS_LEGACY; i++) { + for (i = 0; i < nr_legacy_irqs(); i++) { if (isa_irq_to_gsi[i] == gsi) { return i; } @@ -114,7 +115,7 @@ static unsigned int gsi_to_irq(unsigned int gsi) * except on truly weird platforms that have * non isa irqs in the first 16 gsis. */ - if (gsi >= NR_IRQS_LEGACY) + if (gsi >= nr_legacy_irqs()) irq = gsi; else irq = gsi_top + gsi; @@ -371,7 +372,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, * otherwise there will be more than one entry with the same GSI * and acpi_isa_irq_to_gsi() may give wrong result. */ - if (gsi < NR_IRQS_LEGACY && isa_irq_to_gsi[gsi] == gsi) + if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi) isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI; isa_irq_to_gsi[bus_irq] = gsi; } @@ -628,7 +629,7 @@ EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { - if (isa_irq < NR_IRQS_LEGACY && + if (isa_irq < nr_legacy_irqs() && isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) { *gsi = isa_irq_to_gsi[isa_irq]; return 0; @@ -1017,7 +1018,7 @@ static void __init mp_config_acpi_legacy_irqs(void) * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { + for (i = 0; i < nr_legacy_irqs(); i++) { int ioapic, pin; unsigned int dstapic; int idx; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 424f795a1c78..4208ea9edc10 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -129,10 +129,17 @@ u32 mp_pin_to_gsi(int ioapic, int pin) return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; } -/* Initialize all legacy IRQs and all pins on the first IOAPIC at boot */ +/* + * Initialize all legacy IRQs and all pins on the first IOAPIC + * if we have legacy interrupt controller. Kernel boot option "pirq=" + * may rely on non-legacy pins on the first IOAPIC. + */ static inline int mp_init_irq_at_boot(int ioapic, int irq) { - return ioapic == 0 || (irq >= 0 && irq < NR_IRQS_LEGACY); + if (!nr_legacy_irqs()) + return 0; + + return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); } int nr_ioapics; @@ -216,7 +223,7 @@ int __init arch_early_irq_init(void) struct irq_cfg *cfg; int count, node, i; - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; for_each_ioapic(i) { @@ -239,7 +246,7 @@ int __init arch_early_irq_init(void) * For legacy IRQ's, start with assigning irq0 to irq15 to * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. */ - if (i < legacy_pic->nr_legacy_irqs) { + if (i < nr_legacy_irqs()) { cfg[i].vector = IRQ0_VECTOR + i; cpumask_setall(cfg[i].domain); } @@ -823,7 +830,7 @@ static int __init find_isa_irq_apic(int irq, int type) */ static int EISA_ELCR(unsigned int irq) { - if (irq < legacy_pic->nr_legacy_irqs) { + if (irq < nr_legacy_irqs()) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -980,7 +987,7 @@ static int pin_2_irq(int idx, int apic, int pin) } else { u32 gsi = gsi_cfg->gsi_base + pin; - if (gsi >= NR_IRQS_LEGACY) + if (gsi >= nr_legacy_irqs()) irq = gsi; else irq = gsi_top + gsi; @@ -1357,7 +1364,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, } ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < legacy_pic->nr_legacy_irqs) + if (irq < nr_legacy_irqs()) legacy_pic->mask(irq); ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); @@ -1782,7 +1789,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1854,7 +1861,7 @@ void __init enable_IO_APIC(void) int i8259_apic, i8259_pin; int apic, pin; - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; for_each_ioapic_pin(apic, pin) { @@ -1939,7 +1946,7 @@ void disable_IO_APIC(void) */ clear_IO_APIC(); - if (!legacy_pic->nr_legacy_irqs) + if (!nr_legacy_irqs()) return; x86_io_apic_ops.disable(); @@ -2143,7 +2150,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - if (irq < legacy_pic->nr_legacy_irqs) { + if (irq < nr_legacy_irqs()) { legacy_pic->mask(irq); if (legacy_pic->irq_pending(irq)) was_pending = 1; @@ -2542,7 +2549,7 @@ static inline void init_IO_APIC_traps(void) * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < legacy_pic->nr_legacy_irqs) + if (irq < nr_legacy_irqs()) legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ @@ -2839,7 +2846,7 @@ void __init setup_IO_APIC(void) /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ - io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; + io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); /* @@ -2850,7 +2857,7 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (legacy_pic->nr_legacy_irqs) + if (nr_legacy_irqs()) check_timer(); } @@ -3348,7 +3355,7 @@ static int __init io_apic_get_redir_entries(int ioapic) unsigned int arch_dynirq_lower_bound(unsigned int from) { - unsigned int min = gsi_top + NR_IRQS_LEGACY; + unsigned int min = gsi_top + nr_legacy_irqs(); return from < min ? min : from; } @@ -3360,17 +3367,17 @@ int __init arch_probe_nr_irqs(void) if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) nr_irqs = NR_VECTORS * nr_cpu_ids; - nr = (gsi_top + NR_IRQS_LEGACY) + 8 * nr_cpu_ids; + nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) /* * for MSI and HT dyn irq */ - nr += (gsi_top + NR_IRQS_LEGACY) * 16; + nr += gsi_top * 16; #endif if (nr < nr_irqs) nr_irqs = nr; - return NR_IRQS_LEGACY; + return nr_legacy_irqs(); } int io_apic_set_pci_routing(struct device *dev, int irq, diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7db54b5d5f86..b4680058db8b 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -21,6 +21,7 @@ #include #include #include +#include __initdata u64 initial_dtb; char __initdata cmd_line[COMMAND_LINE_SIZE]; @@ -314,7 +315,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num, struct irq_domain *id; struct mp_ioapic_gsi *gsi_cfg; int ret; - int num; + int num, legacy_irqs = nr_legacy_irqs(); gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; @@ -324,17 +325,17 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num, BUG_ON(!id); if (gsi_cfg->gsi_base == 0) { /* - * The first NR_IRQS_LEGACY irq descs are allocated in + * The first nr_legacy_irqs() irq descs are allocated in * early_irq_init() and need just a mapping. The * remaining irqs need both. All of them are preallocated * and assigned so we can keep the 1:1 mapping which the ioapic * is having. */ - irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + irq_domain_associate_many(id, 0, 0, legacy_irqs); - if (num > NR_IRQS_LEGACY) { - ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, - NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (num > legacy_irqs) { + ret = irq_create_strict_mappings(id, legacy_irqs, + legacy_irqs, num - legacy_irqs); if (ret) pr_err("Error creating mapping for the " "remaining IRQs: %d\n", ret); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7f50156542fb..a0111e91eb4b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -78,7 +78,7 @@ void __init init_ISA_irqs(void) #endif legacy_pic->init(0); - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) + for (i = 0; i < nr_legacy_irqs(); i++) irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); } @@ -100,7 +100,7 @@ void __init init_IRQ(void) * then this vector space can be freed and re-used dynamically as the * irq's migrate etc. */ - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) + for (i = 0; i < nr_legacy_irqs(); i++) per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; x86_init.irqs.intr_init(); @@ -121,7 +121,7 @@ void setup_vector_irq(int cpu) * legacy PIC, for the new cpu that is coming online, setup the static * legacy vector to irq mapping: */ - for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) + for (irq = 0; irq < nr_legacy_irqs(); irq++) per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; #endif diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 905956f16465..093f5f4272d3 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -23,6 +23,7 @@ #include #include #include +#include static int xen_pcifront_enable_irq(struct pci_dev *dev) { @@ -40,7 +41,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ pirq = gsi; - if (gsi < NR_IRQS_LEGACY) + if (gsi < nr_legacy_irqs()) share = 0; rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); @@ -511,7 +512,7 @@ int __init pci_xen_initial_domain(void) xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; /* Pre-allocate legacy irqs */ - for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { + for (irq = 0; irq < nr_legacy_irqs(); irq++) { int trigger, polarity; if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) @@ -522,7 +523,7 @@ int __init pci_xen_initial_domain(void) true /* Map GSI to PIRQ */); } if (0 == nr_ioapics) { - for (irq = 0; irq < NR_IRQS_LEGACY; irq++) + for (irq = 0; irq < nr_legacy_irqs(); irq++) xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); } return 0; -- cgit v1.2.2 From 4b92b4f754939e4ac6bb53355abbe48a5054b573 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:49 +0800 Subject: x86, irq: Simplify arch_early_irq_init() Simplify function arch_early_irq_init() and kill static array irq_cfgx[]. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-21-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4208ea9edc10..e25e7e315d4f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -183,6 +183,7 @@ early_param("noapic", parse_noapic); static int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); +static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ void mp_save_irq(struct mpc_intsrc *m) @@ -214,14 +215,10 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; - int __init arch_early_irq_init(void) { struct irq_cfg *cfg; - int count, node, i; + int i, node = cpu_to_node(0); if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; @@ -234,22 +231,14 @@ int __init arch_early_irq_init(void) pr_err("IOAPIC %d: suspend/resume impossible!\n", i); } - cfg = irq_cfgx; - count = ARRAY_SIZE(irq_cfgx); - node = cpu_to_node(0); - - for (i = 0; i < count; i++) { - irq_set_chip_data(i, &cfg[i]); - zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); - zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. - */ - if (i < nr_legacy_irqs()) { - cfg[i].vector = IRQ0_VECTOR + i; - cpumask_setall(cfg[i].domain); - } + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + cfg = alloc_irq_and_cfg_at(i, node); + cfg->vector = IRQ0_VECTOR + i; + cpumask_setall(cfg->domain); } return 0; @@ -3377,7 +3366,7 @@ int __init arch_probe_nr_irqs(void) if (nr < nr_irqs) nr_irqs = nr; - return nr_legacy_irqs(); + return 0; } int io_apic_set_pci_routing(struct device *dev, int irq, -- cgit v1.2.2 From 6b9fb7082409cd4a2c7caf43e3c023ad82dad0d4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 10 Jun 2014 14:13:25 +0800 Subject: x86, ACPI, irq: Consolidate algorithm of mapping (ioapic, pin) to IRQ number Currently ACPI and ioapic both implement algorithms to map (ioapic, pin) to IRQ number. So consolidate the common part into one place, which is also preparing for irqdomain support. It introduces mp_map_gsi_to_irq(), which will be used to allocate IRQ number IOAPIC pins when irqdomain is enabled. Also rename gsi_to_irq() to map_gsi_to_irq(), later we will introduce unmap_gsi_to_irq() when enabling IOAPIC hotplug. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402380812-32446-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 2 ++ arch/x86/kernel/acpi/boot.c | 45 ++++++++++++++++++++---------------------- arch/x86/kernel/apic/io_apic.c | 27 ++++++++++++++----------- 3 files changed, 38 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index b775cf3622c3..978e51fdcb59 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -172,6 +172,7 @@ extern u32 gsi_top; extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); +extern int mp_map_gsi_to_irq(u32 gsi); extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void __init pre_init_apic_IRQ0(void); @@ -214,6 +215,7 @@ static inline void ioapic_insert_resources(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } +static inline int mp_map_gsi_to_irq(u32 gsi) { return gsi; } struct io_apic_irq_attr; static inline int io_apic_set_pci_routing(struct device *dev, int irq, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b12976590a72..9965afbd71ca 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -100,27 +100,15 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { #define ACPI_INVALID_GSI INT_MIN -static unsigned int gsi_to_irq(unsigned int gsi) +static int map_gsi_to_irq(unsigned int gsi) { - unsigned int irq = gsi + nr_legacy_irqs(); - unsigned int i; + int i; - for (i = 0; i < nr_legacy_irqs(); i++) { - if (isa_irq_to_gsi[i] == gsi) { + for (i = 0; i < nr_legacy_irqs(); i++) + if (isa_irq_to_gsi[i] == gsi) return i; - } - } - - /* Provide an identity mapping of gsi == irq - * except on truly weird platforms that have - * non isa irqs in the first 16 gsis. - */ - if (gsi >= nr_legacy_irqs()) - irq = gsi; - else - irq = gsi_top + gsi; - return irq; + return mp_map_gsi_to_irq(gsi); } /* @@ -416,6 +404,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { + int irq; int ioapic; int ioapic_pin; struct io_apic_irq_attr irq_attr; @@ -428,6 +417,10 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (acpi_gbl_FADT.sci_interrupt == gsi) return gsi; + irq = map_gsi_to_irq(gsi); + if (irq < 0) + return ACPI_INVALID_GSI; + ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); @@ -449,7 +442,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); + ret = io_apic_set_pci_routing(dev, irq, &irq_attr); if (ret < 0) gsi = ACPI_INVALID_GSI; @@ -614,16 +607,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) outb(new >> 8, 0x4d1); } -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - *irq = gsi_to_irq(gsi); + int irq = map_gsi_to_irq(gsi); + if (irq >= 0) { #ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) - setup_IO_APIC_irq_extra(gsi); + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) + setup_IO_APIC_irq_extra(gsi); #endif + *irqp = irq; + return 0; + } - return 0; + return -1; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); @@ -681,7 +678,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) plat_gsi = __acpi_register_gsi(dev, gsi, trigger, polarity); if (plat_gsi != ACPI_INVALID_GSI) - return gsi_to_irq(plat_gsi); + return map_gsi_to_irq(plat_gsi); return -1; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e25e7e315d4f..7fd9f1befe0b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -959,11 +959,19 @@ static int irq_trigger(int idx) return trigger; } +int mp_map_gsi_to_irq(u32 gsi) +{ + /* + * Provide an identity mapping of gsi == irq except on truly weird + * platforms that have non isa irqs in the first 16 gsis. + */ + return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; +} + static int pin_2_irq(int idx, int apic, int pin) { int irq; int bus = mp_irqs[idx].srcbus; - struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); /* * Debugging check, we are in big trouble if this message pops up! @@ -971,17 +979,6 @@ static int pin_2_irq(int idx, int apic, int pin) if (mp_irqs[idx].dstirq != pin) pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].srcbusirq; - } else { - u32 gsi = gsi_cfg->gsi_base + pin; - - if (gsi >= nr_legacy_irqs()) - irq = gsi; - else - irq = gsi_top + gsi; - } - #ifdef CONFIG_X86_32 /* * PCI IRQ command line redirection. Yes, limits are hardcoded. @@ -996,11 +993,17 @@ static int pin_2_irq(int idx, int apic, int pin) apic_printk(APIC_VERBOSE, KERN_DEBUG "using PIRQ%d -> IRQ %d\n", pin-16, irq); + return irq; } } } #endif + if (test_bit(bus, mp_bus_not_pci)) + irq = mp_irqs[idx].srcbusirq; + else + irq = mp_map_gsi_to_irq(mp_pin_to_gsi(apic, pin)); + return irq; } -- cgit v1.2.2 From 84245af7297ced9e8fe837dc0fc4782438771bd2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 10 Jun 2014 14:14:47 +0800 Subject: x86, irq, ACPI: Change __acpi_register_gsi to return IRQ number instead of GSI Currently __acpi_register_gsi is defined to return GSI number and may be set to acpi_register_gsi_pic(), acpi_register_gsi_ioapic(), acpi_register_gsi_xen_hvm() and acpi_register_gsi_xen(). Among which, acpi_register_gsi_ioapic() returns GSI number, but acpi_register_gsi_xen_hvm() and acpi_register_gsi_xen() actually returns IRQ number instead of GSI. And for acpi_register_gsi_pic(), GSI number equals to IRQ number. So change acpi_register_gsi_ioapic() to return IRQ number, it also simplifies the code. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402380887-32512-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9965afbd71ca..0cf311c72bce 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -419,7 +419,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, irq = map_gsi_to_irq(gsi); if (irq < 0) - return ACPI_INVALID_GSI; + return irq; ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { @@ -444,12 +444,11 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); ret = io_apic_set_pci_routing(dev, irq, &irq_attr); if (ret < 0) - gsi = ACPI_INVALID_GSI; + irq = -1; - return gsi; + return irq; } - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -652,11 +651,13 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { + int irq = gsi; + #ifdef CONFIG_X86_IO_APIC - gsi = mp_register_gsi(dev, gsi, trigger, polarity); + irq = mp_register_gsi(dev, gsi, trigger, polarity); #endif - return gsi; + return irq; } int (*__acpi_register_gsi)(struct device *dev, u32 gsi, @@ -674,13 +675,7 @@ int (*acpi_suspend_lowlevel)(void); */ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { - unsigned int plat_gsi; - - plat_gsi = __acpi_register_gsi(dev, gsi, trigger, polarity); - if (plat_gsi != ACPI_INVALID_GSI) - return map_gsi_to_irq(plat_gsi); - - return -1; + return __acpi_register_gsi(dev, gsi, trigger, polarity); } EXPORT_SYMBOL_GPL(acpi_register_gsi); -- cgit v1.2.2 From d7f3d4781852f5160b939f526afbc21a813a0206 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:52 +0800 Subject: x86, irq: Introduce mechanisms to support dynamically allocate IRQ for IOAPIC Currently x86 support identity mapping between GSI(IOAPIC pin) and IRQ number, so continous IRQs at low end are statically allocated to IOAPICs at boot time. This design causes trouble to support IOAPIC hotplug. This patch implements basic mechanism to dynamically allocate IRQ on demand for IOAPIC pins by using irqdomain framework. It first adds several fields into struct ioapic to support irqdomain. Then it implements an algorithm to dynamically allocate IRQ number for IOAPIC pins on demand. Currently it supports three types of irqdomain: 1) LEGACY: used to support IOAPIC hosting legacy IRQs and building identity mapping for legacy IRQs. A speical case, we dynamically allocate IRQ number for IOAPIC pin which has GSI number below nr_legacy_irqs() but isn't legacy IRQ. This is for backward compatibility and avoid regression. 2) STRICT: build identity mapping between GSI and IRQ nubmer. 3) DYNAMIC: dynamically allocate IRQ number for IOAPIC pin on demand. Legacy(ISA) IRQs is not managed by irqdomain because there may be multiple pins sharing the same IRQ number and current irqdomain only supports 1:1 mapping between pins and IRQ. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-24-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + arch/x86/include/asm/io_apic.h | 21 +++++- arch/x86/kernel/acpi/boot.c | 8 +-- arch/x86/kernel/apic/io_apic.c | 158 ++++++++++++++++++++++++++++++++--------- 4 files changed, 147 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9df5c453171e..147a7b788100 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -836,6 +836,7 @@ config X86_IO_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 978e51fdcb59..64c6e344399b 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -98,6 +98,8 @@ struct IR_IO_APIC_route_entry { #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 +#define IOAPIC_MAP_ALLOC 0x1 +#define IOAPIC_MAP_CHECK 0x2 #ifdef CONFIG_X86_IO_APIC @@ -163,6 +165,21 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); +enum ioapic_domain_type { + IOAPIC_DOMAIN_INVALID, + IOAPIC_DOMAIN_LEGACY, + IOAPIC_DOMAIN_STRICT, + IOAPIC_DOMAIN_DYNAMIC, +}; + +struct device_node; +struct irq_domain_ops; +struct ioapic_domain_cfg { + enum ioapic_domain_type type; + const struct irq_domain_ops *ops; + struct device_node *dev; +}; + struct mp_ioapic_gsi{ u32 gsi_base; u32 gsi_end; @@ -172,7 +189,7 @@ extern u32 gsi_top; extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); -extern int mp_map_gsi_to_irq(u32 gsi); +extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void __init pre_init_apic_IRQ0(void); @@ -215,7 +232,7 @@ static inline void ioapic_insert_resources(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } -static inline int mp_map_gsi_to_irq(u32 gsi) { return gsi; } +static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } struct io_apic_irq_attr; static inline int io_apic_set_pci_routing(struct device *dev, int irq, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0cf311c72bce..d6635baf9e3d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -100,7 +100,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { #define ACPI_INVALID_GSI INT_MIN -static int map_gsi_to_irq(unsigned int gsi) +static int map_gsi_to_irq(unsigned int gsi, unsigned int flags) { int i; @@ -108,7 +108,7 @@ static int map_gsi_to_irq(unsigned int gsi) if (isa_irq_to_gsi[i] == gsi) return i; - return mp_map_gsi_to_irq(gsi); + return mp_map_gsi_to_irq(gsi, flags); } /* @@ -417,7 +417,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (acpi_gbl_FADT.sci_interrupt == gsi) return gsi; - irq = map_gsi_to_irq(gsi); + irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); if (irq < 0) return irq; @@ -608,7 +608,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - int irq = map_gsi_to_irq(gsi); + int irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); if (irq >= 0) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7fd9f1befe0b..51ce80004a78 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_RAW_SPINLOCK(vector_lock); +static DEFINE_MUTEX(ioapic_mutex); static struct ioapic { /* @@ -97,6 +99,8 @@ static struct ioapic { struct mpc_ioapic mp_config; /* IO APIC gsi routing info */ struct mp_ioapic_gsi gsi_config; + struct ioapic_domain_cfg irqdomain_cfg; + struct irq_domain *irqdomain; DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } ioapics[MAX_IO_APICS]; @@ -142,6 +146,11 @@ static inline int mp_init_irq_at_boot(int ioapic, int irq) return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); } +static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) +{ + return ioapics[ioapic].irqdomain; +} + int nr_ioapics; /* The one past the highest gsi number used */ @@ -959,19 +968,79 @@ static int irq_trigger(int idx) return trigger; } -int mp_map_gsi_to_irq(u32 gsi) +static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) { + int irq = -1; + int ioapic = (int)(long)domain->host_data; + int type = ioapics[ioapic].irqdomain_cfg.type; + + switch (type) { + case IOAPIC_DOMAIN_LEGACY: + /* + * Dynamically allocate IRQ number for non-ISA IRQs in the first 16 + * GSIs on some weird platforms. + */ + if (gsi < nr_legacy_irqs()) + irq = irq_create_mapping(domain, pin); + else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + irq = gsi; + break; + case IOAPIC_DOMAIN_STRICT: + if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + irq = gsi; + break; + case IOAPIC_DOMAIN_DYNAMIC: + irq = irq_create_mapping(domain, pin); + break; + default: + WARN(1, "ioapic: unknown irqdomain type %d\n", type); + break; + } + + return irq > 0 ? irq : -1; +} + +static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, + unsigned int flags) +{ + int irq; + struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + /* - * Provide an identity mapping of gsi == irq except on truly weird - * platforms that have non isa irqs in the first 16 gsis. + * Don't use irqdomain to manage ISA IRQs because there may be + * multiple IOAPIC pins sharing the same ISA IRQ number and + * irqdomain only supports 1:1 mapping between IOAPIC pin and + * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used + * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). + * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are + * available, and some BIOSes may use MP Interrupt Source records + * to override IRQ numbers for PIRQs instead of reprogramming + * the interrupt routing logic. Thus there may be multiple pins + * sharing the same legacy IRQ number when ACPI is disabled. */ - return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; + if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) + return mp_irqs[idx].srcbusirq; + + if (!domain) { + /* + * Provide an identity mapping of gsi == irq except on truly + * weird platforms that have non isa irqs in the first 16 gsis. + */ + return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; + } + + mutex_lock(&ioapic_mutex); + irq = irq_find_mapping(domain, pin); + if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) + irq = alloc_irq_from_domain(domain, gsi, pin); + mutex_unlock(&ioapic_mutex); + + return irq > 0 ? irq : -1; } -static int pin_2_irq(int idx, int apic, int pin) +static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) { - int irq; - int bus = mp_irqs[idx].srcbus; + u32 gsi = mp_pin_to_gsi(ioapic, pin); /* * Debugging check, we are in big trouble if this message pops up! @@ -989,7 +1058,7 @@ static int pin_2_irq(int idx, int apic, int pin) apic_printk(APIC_VERBOSE, KERN_DEBUG "disabling PIRQ%d\n", pin-16); } else { - irq = pirq_entries[pin-16]; + int irq = pirq_entries[pin-16]; apic_printk(APIC_VERBOSE, KERN_DEBUG "using PIRQ%d -> IRQ %d\n", pin-16, irq); @@ -999,12 +1068,23 @@ static int pin_2_irq(int idx, int apic, int pin) } #endif - if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].srcbusirq; - else - irq = mp_map_gsi_to_irq(mp_pin_to_gsi(apic, pin)); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); +} - return irq; +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) +{ + int ioapic, pin, idx; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -1; + + pin = mp_find_ioapic_pin(ioapic, gsi); + idx = find_irq_entry(ioapic, pin, mp_INT); + if ((flags & IOAPIC_MAP_CHECK) && idx < 0) + return -1; + + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); } /* @@ -1014,7 +1094,7 @@ static int pin_2_irq(int idx, int apic, int pin) int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, struct io_apic_irq_attr *irq_attr) { - int irq, i, best_guess = -1; + int irq, i, best_ioapic = -1, best_idx = -1; apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", @@ -1043,30 +1123,37 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, continue; /* Skip ISA IRQs */ - irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); - if (ioapic_idx == 0 && !IO_APIC_IRQ(irq)) + irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0); + if (irq > 0 && !IO_APIC_IRQ(irq)) continue; if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - return irq; + best_idx = i; + best_ioapic = ioapic_idx; + goto out; } + /* * Use the first all-but-pin matching entry as a * best-guess fuzzy result for broken mptables. */ - if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - best_guess = irq; + if (best_idx < 0) { + best_idx = i; + best_ioapic = ioapic_idx; } } - return best_guess; + if (best_idx < 0) + return -1; + +out: + irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, + IOAPIC_MAP_ALLOC); + if (irq > 0) + set_io_apic_irq_attr(irq_attr, best_ioapic, + mp_irqs[best_idx].dstirq, + irq_trigger(best_idx), + irq_polarity(best_idx)); + return irq; } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); @@ -1257,7 +1344,7 @@ static inline int IO_APIC_irq_trigger(int irq) for_each_ioapic_pin(apic, pin) { idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0))) return irq_trigger(idx); } /* @@ -1383,8 +1470,9 @@ static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) continue; - irq = pin_2_irq(idx, ioapic_idx, pin); - if (!mp_init_irq_at_boot(ioapic_idx, irq)) + irq = pin_2_irq(idx, ioapic_idx, pin, + ioapic_idx ? 0 : IOAPIC_MAP_ALLOC); + if (irq < 0 || !mp_init_irq_at_boot(ioapic_idx, irq)) continue; /* @@ -1434,8 +1522,8 @@ void setup_IO_APIC_irq_extra(u32 gsi) if (idx == -1) return; - irq = pin_2_irq(idx, ioapic_idx, pin); - if (mp_init_irq_at_boot(ioapic_idx, irq)) + irq = pin_2_irq(idx, ioapic_idx, pin, IOAPIC_MAP_ALLOC); + if (irq < 0 || mp_init_irq_at_boot(ioapic_idx, irq)) return; set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), @@ -3543,8 +3631,8 @@ void __init setup_ioapic_dest(void) if (irq_entry == -1) continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - if (!mp_init_irq_at_boot(ioapic, irq)) + irq = pin_2_irq(irq_entry, ioapic, pin, 0); + if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) continue; idata = irq_get_irq_data(irq); -- cgit v1.2.2 From 44767bfaaed782d6d635ecbb13f3980041e6f33e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:53 +0800 Subject: x86, irq: Enhance mp_register_ioapic() to support irqdomain Enhance function mp_register_ioapic() to support irqdomain. When registering IOAPIC, caller may provide callbacks and parameters for creating irqdomain. The IOAPIC core will create irqdomain later if caller has passed in corresponding parameters. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: sfi-devel@simplefirmware.org Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Cc: Rob Herring Cc: Michal Simek Cc: Tony Lindgren Link: http://lkml.kernel.org/r/1402302011-23642-25-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 3 ++- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic/io_apic.c | 42 +++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/devicetree.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/platform/sfi/sfi.c | 2 +- 6 files changed, 47 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 64c6e344399b..3e4bea3a52b1 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -190,7 +190,8 @@ extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); -extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); +extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg); extern void __init pre_init_apic_IRQ0(void); extern void mp_save_irq(struct mpc_intsrc *m); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d6635baf9e3d..f86b4bae4640 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) acpi_table_print_madt_entry(header); mp_register_ioapic(ioapic->id, - ioapic->address, ioapic->global_irq_base); + ioapic->address, ioapic->global_irq_base, NULL); return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 51ce80004a78..563f4504f54d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -85,6 +85,7 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_RAW_SPINLOCK(vector_lock); static DEFINE_MUTEX(ioapic_mutex); +static unsigned int ioapic_dynirq_base; static struct ioapic { /* @@ -2920,8 +2921,35 @@ out: */ #define PIC_IRQS (1UL << PIC_CASCADE_IR) +static int mp_irqdomain_create(int ioapic) +{ + int hwirqs = mp_ioapic_pin_count(ioapic); + struct ioapic *ip = &ioapics[ioapic]; + struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + + if (cfg->type == IOAPIC_DOMAIN_INVALID) + return 0; + + ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, + (void *)(long)ioapic); + if(!ip->irqdomain) + return -ENOMEM; + + if (cfg->type == IOAPIC_DOMAIN_LEGACY || + cfg->type == IOAPIC_DOMAIN_STRICT) + ioapic_dynirq_base = max(ioapic_dynirq_base, + gsi_cfg->gsi_end + 1); + + if (gsi_cfg->gsi_base == 0) + irq_set_default_host(ip->irqdomain); + + return 0; +} + void __init setup_IO_APIC(void) { + int ioapic; /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP @@ -2929,6 +2957,9 @@ void __init setup_IO_APIC(void) io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + for_each_ioapic(ioapic) + BUG_ON(mp_irqdomain_create(ioapic)); + /* * Set up IO-APIC IRQ routing. */ @@ -3437,6 +3468,9 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) { unsigned int min = gsi_top + nr_legacy_irqs(); + if (ioapic_dynirq_base) + return ioapic_dynirq_base; + return from < min ? min : from; } @@ -3812,7 +3846,8 @@ static __init int bad_ioapic_register(int idx) return 0; } -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg) { int idx = 0; int entries; @@ -3826,6 +3861,11 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) ioapics[idx].mp_config.type = MP_IOAPIC; ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ioapics[idx].mp_config.apicaddr = address; + ioapics[idx].irqdomain = NULL; + if (cfg) + ioapics[idx].irqdomain_cfg = *cfg; + else + ioapics[idx].irqdomain_cfg.type = IOAPIC_DOMAIN_INVALID; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b4680058db8b..d2c53feacd77 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -177,7 +177,7 @@ static void __init dtb_add_ioapic(struct device_node *dn) dn->full_name); return; } - mp_register_ioapic(++ioapic_id, r.start, gsi_top); + mp_register_ioapic(++ioapic_id, r.start, gsi_top, NULL); } static void __init dtb_ioapic_setup(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b10e1132f316..ea8595e4cbd5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -115,7 +115,7 @@ static void __init MP_bus_info(struct mpc_bus *m) static void __init MP_ioapic_info(struct mpc_ioapic *m) { if (m->flags & MPC_APIC_USABLE) - mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, NULL); } static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index bcd1a703e3e6..8f2f789c6f04 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -82,7 +82,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) pentry = (struct sfi_apic_table_entry *)sb->pentry; for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_top); + mp_register_ioapic(i, pentry->phys_addr, gsi_top, NULL); pentry++; } -- cgit v1.2.2 From ca7e28aa4ff34fdd6622d915682b2765453c5ddd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:54 +0800 Subject: x86, ACPI, irq: Provide basic irqdomain support Enhance ACPI driver to provide basic irqdomain support for IOAPIC. We will build identity mapping for IOAPICs hosting legacy IRQs, otherwise dynamically allocate IRQ numbers for IOAPIC pins on demand. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-26-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f86b4bae4640..8d9aee1e67e8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -449,10 +450,16 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, return irq; } +static struct irq_domain_ops acpi_irqdomain_ops; + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &acpi_irqdomain_ops, + }; ioapic = (struct acpi_madt_io_apic *)header; @@ -461,8 +468,12 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) acpi_table_print_madt_entry(header); - mp_register_ioapic(ioapic->id, - ioapic->address, ioapic->global_irq_base, NULL); + /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ + if (ioapic->global_irq_base < nr_legacy_irqs()) + cfg.type = IOAPIC_DOMAIN_LEGACY; + + mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base, + &cfg); return 0; } -- cgit v1.2.2 From 74501edcd846830bec86bfa06c47c25083e70ffc Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:55 +0800 Subject: x86, mpparse, irq: Provide basic irqdomain support Enhance mpparse to provide basic support of irqdomain. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-27-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index ea8595e4cbd5..13c8e1f864fc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -112,10 +113,17 @@ static void __init MP_bus_info(struct mpc_bus *m) pr_warn("Unknown bustype %s - ignoring\n", str); } +static struct irq_domain_ops mp_ioapic_irqdomain_ops; + static void __init MP_ioapic_info(struct mpc_ioapic *m) { + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_LEGACY, + .ops = &mp_ioapic_irqdomain_ops, + }; + if (m->flags & MPC_APIC_USABLE) - mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, NULL); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, &cfg); } static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) -- cgit v1.2.2 From 1b5d3e00d45e093fa0551c588034c3355b362f66 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:56 +0800 Subject: x86, SFI, irq: Provide basic irqdomain support Enhance SFI to provide basic support of irqdomain with identity mapping between GSIs and IRQs. Some Intel MID platforms assumes identity mapping between GSI and IRQ, so we can't dynamically allocate IRQ number on demand. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: sfi-devel@simplefirmware.org Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: David Cohen Cc: Kuppuswamy Sathyanarayanan Link: http://lkml.kernel.org/r/1402302011-23642-28-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/intel_mid_pci.c | 3 +++ arch/x86/platform/intel-mid/sfi.c | 2 ++ arch/x86/platform/sfi/sfi.c | 8 +++++++- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 84b9d672843d..fcbdc5fac2c6 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -217,6 +217,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to * IOAPIC RTE entries, so we just enable RTE for the device. */ + if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) + return -EBUSY; + irq_attr.ioapic = mp_find_ioapic(dev->irq); irq_attr.ioapic_pin = dev->irq; irq_attr.trigger = 1; /* level */ diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 994c40bd7cb7..7161395e7de7 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -473,6 +473,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) /* PNW and CLV go with active low */ irq_attr.polarity = 1; } + WARN_ON(mp_map_gsi_to_irq(irq, + IOAPIC_MAP_ALLOC) < 0); io_apic_set_pci_routing(NULL, irq, &irq_attr); } } else { diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 8f2f789c6f04..1fdaa57f41a5 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -70,19 +71,24 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +static struct irq_domain_ops sfi_ioapic_irqdomain_ops; static int __init sfi_parse_ioapic(struct sfi_table_header *table) { struct sfi_table_simple *sb; struct sfi_apic_table_entry *pentry; int i, num; + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_STRICT, + .ops = &sfi_ioapic_irqdomain_ops, + }; sb = (struct sfi_table_simple *)table; num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); pentry = (struct sfi_apic_table_entry *)sb->pentry; for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_top, NULL); + mp_register_ioapic(i, pentry->phys_addr, gsi_top, &cfg); pentry++; } -- cgit v1.2.2 From facd8fdb25fc4d041a283446cfb040cbfe2c3723 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:57 +0800 Subject: x86, devicetree, irq: Use common mechanism to support irqdomain Now the ioapic driver provides a common interface to create irqdomain, so replace the private implementation. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Rob Herring Cc: Michal Simek Cc: Tony Lindgren Link: http://lkml.kernel.org/r/1402302011-23642-29-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/prom.h | 2 - arch/x86/kernel/devicetree.c | 191 +++++++++++++------------------------------ arch/x86/kernel/irqinit.c | 6 -- 3 files changed, 59 insertions(+), 140 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index fbeb06ed0eaa..1d081ac1cd69 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -26,12 +26,10 @@ extern int of_ioapic; extern u64 initial_dtb; extern void add_dtb(u64 data); -extern void x86_add_irq_domains(void); void x86_of_pci_init(void); void x86_dtb_init(void); #else static inline void add_dtb(u64 data) { } -static inline void x86_add_irq_domains(void) { } static inline void x86_of_pci_init(void) { } static inline void x86_dtb_init(void) { } #define of_ioapic 0 diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index d2c53feacd77..ee26feca93d9 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -166,82 +166,6 @@ static void __init dtb_lapic_setup(void) #ifdef CONFIG_X86_IO_APIC static unsigned int ioapic_id; -static void __init dtb_add_ioapic(struct device_node *dn) -{ - struct resource r; - int ret; - - ret = of_address_to_resource(dn, 0, &r); - if (ret) { - printk(KERN_ERR "Can't obtain address from node %s.\n", - dn->full_name); - return; - } - mp_register_ioapic(++ioapic_id, r.start, gsi_top, NULL); -} - -static void __init dtb_ioapic_setup(void) -{ - struct device_node *dn; - - for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") - dtb_add_ioapic(dn); - - if (nr_ioapics) { - of_ioapic = 1; - return; - } - printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); -} -#else -static void __init dtb_ioapic_setup(void) {} -#endif - -static void __init dtb_apic_setup(void) -{ - dtb_lapic_setup(); - dtb_ioapic_setup(); -} - -#ifdef CONFIG_OF_FLATTREE -static void __init x86_flattree_get_config(void) -{ - u32 size, map_len; - void *dt; - - if (!initial_dtb) - return; - - map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); - - initial_boot_params = dt = early_memremap(initial_dtb, map_len); - size = of_get_flat_dt_size(); - if (map_len < size) { - early_iounmap(dt, map_len); - initial_boot_params = dt = early_memremap(initial_dtb, size); - map_len = size; - } - - unflatten_and_copy_device_tree(); - early_iounmap(dt, map_len); -} -#else -static inline void x86_flattree_get_config(void) { } -#endif - -void __init x86_dtb_init(void) -{ - x86_flattree_get_config(); - - if (!of_have_populated_dt()) - return; - - dtb_setup_hpet(); - dtb_apic_setup(); -} - -#ifdef CONFIG_X86_IO_APIC - struct of_ioapic_type { u32 out_type; u32 trigger; @@ -292,7 +216,7 @@ static int ioapic_xlate(struct irq_domain *domain, it = &of_ioapic_type[intspec[1]]; - idx = (u32) domain->host_data; + idx = (u32)(long)domain->host_data; set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), @@ -309,78 +233,81 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; -static void dt_add_ioapic_domain(unsigned int ioapic_num, - struct device_node *np) +static void __init dtb_add_ioapic(struct device_node *dn) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; + struct resource r; int ret; - int num, legacy_irqs = nr_legacy_irqs(); - - gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); - num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; - - id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, - (void *)ioapic_num); - BUG_ON(!id); - if (gsi_cfg->gsi_base == 0) { - /* - * The first nr_legacy_irqs() irq descs are allocated in - * early_irq_init() and need just a mapping. The - * remaining irqs need both. All of them are preallocated - * and assigned so we can keep the 1:1 mapping which the ioapic - * is having. - */ - irq_domain_associate_many(id, 0, 0, legacy_irqs); - - if (num > legacy_irqs) { - ret = irq_create_strict_mappings(id, legacy_irqs, - legacy_irqs, num - legacy_irqs); - if (ret) - pr_err("Error creating mapping for the " - "remaining IRQs: %d\n", ret); - } - irq_set_default_host(id); - } else { - ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); - if (ret) - pr_err("Error creating IRQ mapping: %d\n", ret); + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &ioapic_irq_domain_ops, + .dev = dn, + }; + + ret = of_address_to_resource(dn, 0, &r); + if (ret) { + printk(KERN_ERR "Can't obtain address from node %s.\n", + dn->full_name); + return; } + mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg); } -static void __init ioapic_add_ofnode(struct device_node *np) +static void __init dtb_ioapic_setup(void) { - struct resource r; - int i, ret; + struct device_node *dn; - ret = of_address_to_resource(np, 0, &r); - if (ret) { - printk(KERN_ERR "Failed to obtain address for %s\n", - np->full_name); + for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") + dtb_add_ioapic(dn); + + if (nr_ioapics) { + of_ioapic = 1; return; } + printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); +} +#else +static void __init dtb_ioapic_setup(void) {} +#endif - for (i = 0; i < nr_ioapics; i++) { - if (r.start == mpc_ioapic_addr(i)) { - dt_add_ioapic_domain(i, np); - return; - } - } - printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name); +static void __init dtb_apic_setup(void) +{ + dtb_lapic_setup(); + dtb_ioapic_setup(); } -void __init x86_add_irq_domains(void) +#ifdef CONFIG_OF_FLATTREE +static void __init x86_flattree_get_config(void) { - struct device_node *dp; + u32 size, map_len; + void *dt; - if (!of_have_populated_dt()) + if (!initial_dtb) return; - for_each_node_with_property(dp, "interrupt-controller") { - if (of_device_is_compatible(dp, "intel,ce4100-ioapic")) - ioapic_add_ofnode(dp); + map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); + + initial_boot_params = dt = early_memremap(initial_dtb, map_len); + size = of_get_flat_dt_size(); + if (map_len < size) { + early_iounmap(dt, map_len); + initial_boot_params = dt = early_memremap(initial_dtb, size); + map_len = size; } + + unflatten_and_copy_device_tree(); + early_iounmap(dt, map_len); } #else -void __init x86_add_irq_domains(void) { } +static inline void x86_flattree_get_config(void) { } #endif + +void __init x86_dtb_init(void) +{ + x86_flattree_get_config(); + + if (!of_have_populated_dt()) + return; + + dtb_setup_hpet(); + dtb_apic_setup(); +} diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a0111e91eb4b..1e6cff5814fa 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -86,12 +86,6 @@ void __init init_IRQ(void) { int i; - /* - * We probably need a better place for this, but it works for - * now ... - */ - x86_add_irq_domains(); - /* * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, -- cgit v1.2.2 From 15a3c7cc9154321fc3ed1f7738bb7bbe690af91d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:58 +0800 Subject: x86, irq: Introduce two helper functions to support irqdomain map operation Currently there are multiple entries to program IOAPIC pins, such as io_apic_setup_irq_pin_once(), io_apic_set_pci_routing() and setup_IO_APIC_irq_extra() etc. This patch introduces two functions to help consolidate the code to program IOAPIC pins. Function mp_set_pin_attr() is used to optionally set trigger, polarity and NUMA node property for an IOAPIC pin. If mp_set_pin_attr() is not invoked for a pin, the default configuration from BIOS will be used. Function mp_irqdomain_map() is an common implementation of irqdomain map() operation. It figures out attribures for pin and then actually programs the IOAPIC pin. We hope this will be the only entrance for programming IOAPIC pin. And the flow will: 1) caller such as xxx_pci_irq_enable figures out pin attributes. 2) Invoke mp_set_pin_attr() to set attributes for a pin. If the pin has already bin programmed, mp_set_pin_attr() will aslo detects attribute confictions. 3) Invoke mp_map_pin_to_irq() 3.1) If IRQ has already been assigned, return irq_find_mapping() 3.2) Else irq_create_mapping() ->irq_domain_associate() ->mp_irqdomain_map() ->io_apic_setup_irq_pin() So every pin will only programmed once by mp_irqdomain_map(), so we could kill io_apic_setup_irq_pin_once(), io_apic_set_pci_routing() and setup_IO_APIC_irq_extra() etc. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-30-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 5 +++ arch/x86/kernel/apic/io_apic.c | 99 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 3e4bea3a52b1..c53587868590 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -173,7 +173,9 @@ enum ioapic_domain_type { }; struct device_node; +struct irq_domain; struct irq_domain_ops; + struct ioapic_domain_cfg { enum ioapic_domain_type type; const struct irq_domain_ops *ops; @@ -192,6 +194,9 @@ extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); +extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq); +extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); extern void __init pre_init_apic_IRQ0(void); extern void mp_save_irq(struct mpc_intsrc *m); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 563f4504f54d..a602b35bcfe7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -87,6 +87,14 @@ static DEFINE_RAW_SPINLOCK(vector_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; +struct mp_pin_info { + int trigger; + int polarity; + int node; + int set; + u32 count; +}; + static struct ioapic { /* * # of IRQ routing registers @@ -102,6 +110,7 @@ static struct ioapic { struct mp_ioapic_gsi gsi_config; struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; + struct mp_pin_info *pin_info; DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } ioapics[MAX_IO_APICS]; @@ -147,6 +156,11 @@ static inline int mp_init_irq_at_boot(int ioapic, int irq) return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); } +static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin) +{ + return ioapics[ioapic_idx].pin_info + pin; +} + static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) { return ioapics[ioapic].irqdomain; @@ -1006,6 +1020,7 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, { int irq; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + struct mp_pin_info *info = mp_pin_info(ioapic, pin); /* * Don't use irqdomain to manage ISA IRQs because there may be @@ -1034,6 +1049,13 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, irq = irq_find_mapping(domain, pin); if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) irq = alloc_irq_from_domain(domain, gsi, pin); + + if (flags & IOAPIC_MAP_ALLOC) { + if (irq > 0) + info->count++; + else if (info->count == 0) + info->set = 0; + } mutex_unlock(&ioapic_mutex); return irq > 0 ? irq : -1; @@ -2923,18 +2945,27 @@ out: static int mp_irqdomain_create(int ioapic) { + size_t size; int hwirqs = mp_ioapic_pin_count(ioapic); struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic); + ip->pin_info = kzalloc(size, GFP_KERNEL); + if (!ip->pin_info) + return -ENOMEM; + if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, (void *)(long)ioapic); - if(!ip->irqdomain) + if(!ip->irqdomain) { + kfree(ip->pin_info); + ip->pin_info = NULL; return -ENOMEM; + } if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT) @@ -3902,6 +3933,72 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, nr_ioapics++; } +int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + int ioapic = (int)(long)domain->host_data; + struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); + struct io_apic_irq_attr attr; + + /* + * Skip the timer IRQ if there's a quirk handler installed and if it + * returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(ioapic, virq)) + return 0; + + /* Get default attribute if not set by caller yet */ + if (!info->set) { + u32 gsi = mp_pin_to_gsi(ioapic, hwirq); + + if (acpi_get_override_irq(gsi, &info->trigger, + &info->polarity) < 0) { + /* + * PCI interrupts are always polarity one level + * triggered. + */ + info->trigger = 1; + info->polarity = 1; + } + info->node = NUMA_NO_NODE; + info->set = 1; + } + set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, + info->polarity); + + return io_apic_setup_irq_pin(virq, info->node, &attr); +} + +int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) +{ + int ret = 0; + int ioapic, pin; + struct mp_pin_info *info; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -ENODEV; + + pin = mp_find_ioapic_pin(ioapic, gsi); + info = mp_pin_info(ioapic, pin); + trigger = trigger ? 1 : 0; + polarity = polarity ? 1 : 0; + + mutex_lock(&ioapic_mutex); + if (!info->set) { + info->trigger = trigger; + info->polarity = polarity; + info->node = node; + info->set = 1; + } else if (info->trigger != trigger || info->polarity != polarity) { + ret = -EBUSY; + } + mutex_unlock(&ioapic_mutex); + + return ret; +} + /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { -- cgit v1.2.2 From d7b830013f59cf586c1cec3caa1ce7156da59a13 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:19:59 +0800 Subject: x86, irq, ACPI: Use common irqdomain map interface to program IOAPIC pins Refine ACPI to use common irqdomain map interface to program IOAPIC pins, so we can unify the callsite to progam IOAPIC pins. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-31-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8d9aee1e67e8..9add76f15cb0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -405,11 +405,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { - int irq; - int ioapic; - int ioapic_pin; - struct io_apic_irq_attr irq_attr; - int ret; + int irq, node; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -418,39 +414,27 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (acpi_gbl_FADT.sci_interrupt == gsi) return gsi; + trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; + polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; + node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + if (mp_set_gsi_attr(gsi, trigger, polarity, node)) { + pr_warn("Failed to set pin attr for GSI%d\n", gsi); + return -1; + } + irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); if (irq < 0) return irq; - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); - - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mpc_ioapic_id(ioapic), - ioapic_pin); - return gsi; - } - if (enable_update_mptable) mp_config_acpi_gsi(dev, gsi, trigger, polarity); - set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, - trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - ret = io_apic_set_pci_routing(dev, irq, &irq_attr); - if (ret < 0) - irq = -1; - return irq; } -static struct irq_domain_ops acpi_irqdomain_ops; +static struct irq_domain_ops acpi_irqdomain_ops = { + .map = mp_irqdomain_map, +}; static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) @@ -622,10 +606,6 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) int irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); if (irq >= 0) { -#ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) - setup_IO_APIC_irq_extra(gsi); -#endif *irqp = irq; return 0; } -- cgit v1.2.2 From 9506063992cc0785246fd314a4a40b6314685aa8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:00 +0800 Subject: x86, irq, mpparse: Use common irqdomain map interface to program IOAPIC pins Refine mpparse to use common irqdomain map interface to program IOAPIC pins, so we can unify the callsite to progam IOAPIC pins. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-32-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 4 +++- arch/x86/pci/irq.c | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 13c8e1f864fc..faf503aa3b70 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -113,7 +113,9 @@ static void __init MP_bus_info(struct mpc_bus *m) pr_warn("Unknown bustype %s - ignoring\n", str); } -static struct irq_domain_ops mp_ioapic_irqdomain_ops; +static struct irq_domain_ops mp_ioapic_irqdomain_ops = { + .map = mp_irqdomain_map, +}; static void __init MP_ioapic_info(struct mpc_ioapic *m) { diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 84112f55dd7a..e4200e5e775e 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1227,8 +1227,6 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { - io_apic_set_pci_routing(&dev->dev, irq, - &irq_attr); dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); -- cgit v1.2.2 From ecc527d560cd87c74cc0bc7aff36eb72f7e18615 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:01 +0800 Subject: x86, irq, SFI: Use common irqdomain map interface to program IOAPIC pins Refine SFI to use common irqdomain map interface to program IOAPIC pins, so we can unify the callsite to progam IOAPIC pins. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: sfi-devel@simplefirmware.org Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: David Cohen Cc: Kuppuswamy Sathyanarayanan Link: http://lkml.kernel.org/r/1402302011-23642-33-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/intel_mid_pci.c | 19 +++++-------- arch/x86/platform/intel-mid/sfi.c | 58 +++++++++++++++++---------------------- arch/x86/platform/sfi/sfi.c | 4 ++- 3 files changed, 35 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index fcbdc5fac2c6..337d165c64f1 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -208,27 +208,22 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, static int intel_mid_pci_irq_enable(struct pci_dev *dev) { - u8 pin; - struct io_apic_irq_attr irq_attr; + int polarity; - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) + polarity = 0; /* active high */ + else + polarity = 1; /* active low */ /* * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to * IOAPIC RTE entries, so we just enable RTE for the device. */ + if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev))) + return -EBUSY; if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) return -EBUSY; - irq_attr.ioapic = mp_find_ioapic(dev->irq); - irq_attr.ioapic_pin = dev->irq; - irq_attr.trigger = 1; /* level */ - if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) - irq_attr.polarity = 0; /* active high */ - else - irq_attr.polarity = 1; /* active low */ - io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); - return 0; } diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 7161395e7de7..3c53a90fdb18 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -432,9 +432,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) struct sfi_table_simple *sb; struct sfi_device_table_entry *pentry; struct devs_id *dev = NULL; - int num, i; - int ioapic; - struct io_apic_irq_attr irq_attr; + int num, i, ret; + int polarity; sb = (struct sfi_table_simple *)table; num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); @@ -448,37 +447,30 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) * devices, but they have separate RTE entry in IOAPIC * so we have to enable them one by one here */ - ioapic = mp_find_ioapic(irq); - if (ioapic >= 0) { - irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = irq; - irq_attr.trigger = 1; - if (intel_mid_identify_cpu() == - INTEL_MID_CPU_CHIP_TANGIER) { - if (!strncmp(pentry->name, - "r69001-ts-i2c", 13)) - /* active low */ - irq_attr.polarity = 1; - else if (!strncmp(pentry->name, - "synaptics_3202", 14)) - /* active low */ - irq_attr.polarity = 1; - else if (irq == 41) - /* fast_int_1 */ - irq_attr.polarity = 1; - else - /* active high */ - irq_attr.polarity = 0; - } else { - /* PNW and CLV go with active low */ - irq_attr.polarity = 1; - } - WARN_ON(mp_map_gsi_to_irq(irq, - IOAPIC_MAP_ALLOC) < 0); - io_apic_set_pci_routing(NULL, irq, &irq_attr); + if (intel_mid_identify_cpu() == + INTEL_MID_CPU_CHIP_TANGIER) { + if (!strncmp(pentry->name, "r69001-ts-i2c", 13)) + /* active low */ + polarity = 1; + else if (!strncmp(pentry->name, + "synaptics_3202", 14)) + /* active low */ + polarity = 1; + else if (irq == 41) + /* fast_int_1 */ + polarity = 1; + else + /* active high */ + polarity = 0; + } else { + /* PNW and CLV go with active low */ + polarity = 1; } - } else { - irq = 0; /* No irq */ + + ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE); + if (ret == 0) + ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC); + WARN_ON(ret < 0); } dev = get_device_id(pentry->type, pentry->name); diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 1fdaa57f41a5..2a8a74f3bd76 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -71,7 +71,9 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -static struct irq_domain_ops sfi_ioapic_irqdomain_ops; +static struct irq_domain_ops sfi_ioapic_irqdomain_ops = { + .map = mp_irqdomain_map, +}; static int __init sfi_parse_ioapic(struct sfi_table_header *table) { -- cgit v1.2.2 From 795aacf63fee1bfe8b68fd5db4882576c5c0c570 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:02 +0800 Subject: x86, irq, devicetree: Use common irqdomain map interface to program IOAPIC pins Refine devicetree to use common irqdomain map interface to program IOAPIC pins, so we can unify the callsite to progam IOAPIC pins. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Rob Herring Cc: Michal Simek Cc: Tony Lindgren Link: http://lkml.kernel.org/r/1402302011-23642-34-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/devicetree.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index ee26feca93d9..f33bb436b8f4 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -201,10 +201,8 @@ static int ioapic_xlate(struct irq_domain *domain, const u32 *intspec, u32 intsize, irq_hw_number_t *out_hwirq, u32 *out_type) { - struct io_apic_irq_attr attr; struct of_ioapic_type *it; - u32 line, idx; - int rc; + u32 line, idx, gsi; if (WARN_ON(intsize < 2)) return -EINVAL; @@ -217,12 +215,9 @@ static int ioapic_xlate(struct irq_domain *domain, it = &of_ioapic_type[intspec[1]]; idx = (u32)(long)domain->host_data; - set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); - - rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), - cpu_to_node(0), &attr); - if (rc) - return rc; + gsi = mp_pin_to_gsi(idx, line); + if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0))) + return -EBUSY; *out_hwirq = line; *out_type = it->out_type; @@ -230,6 +225,7 @@ static int ioapic_xlate(struct irq_domain *domain, } const struct irq_domain_ops ioapic_irq_domain_ops = { + .map = mp_irqdomain_map, .xlate = ioapic_xlate, }; -- cgit v1.2.2 From 9f354b0252b81c870ac7534d71906280cb573f86 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:03 +0800 Subject: x86, irq: Clean up unused IOAPIC interface Now we have converted all x86 platforms to use the common irqdomain map interface. There's no caller of io_apic_set_pci_routing(), setup_IO_APIC_irq_extra() and io_apic_setup_irq_pin_once() any more, so kill them. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-35-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 9 ------ arch/x86/kernel/apic/io_apic.c | 70 ------------------------------------------ 2 files changed, 79 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c53587868590..af6f9d4309bf 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -141,9 +141,6 @@ extern int noioapicreroute; struct io_apic_irq_attr; struct irq_cfg; -extern int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr); -extern void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_insert_resources(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, @@ -155,8 +152,6 @@ extern void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); -extern int io_apic_setup_irq_pin_once(unsigned int irq, int node, - struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); extern void mask_ioapic_entries(void); @@ -240,10 +235,6 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } -struct io_apic_irq_attr; -static inline int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) { return 0; } - static inline int save_ioapic_entries(void) { return -ENOMEM; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a602b35bcfe7..6e3d4c771832 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -111,7 +111,6 @@ static struct ioapic { struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; struct mp_pin_info *pin_info; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } ioapics[MAX_IO_APICS]; #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver @@ -1523,38 +1522,6 @@ static void __init setup_IO_APIC_irqs(void) __io_apic_setup_irqs(ioapic_idx); } -/* - * for the gsi that is not in first ioapic - * but could not use acpi_register_gsi() - * like some special sci in IBM x3330 - */ -void setup_IO_APIC_irq_extra(u32 gsi) -{ - int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic_idx = mp_find_ioapic(gsi); - if (ioapic_idx < 0) - return; - - pin = mp_find_ioapic_pin(ioapic_idx, gsi); - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (idx == -1) - return; - - irq = pin_2_irq(idx, ioapic_idx, pin, IOAPIC_MAP_ALLOC); - if (irq < 0 || mp_init_irq_at_boot(ioapic_idx, irq)) - return; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin_once(irq, node, &attr); -} - /* * Set up the timer pin, possibly with the 8259A-master behind. */ @@ -3458,27 +3425,6 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) return ret; } -int io_apic_setup_irq_pin_once(unsigned int irq, int node, - struct io_apic_irq_attr *attr) -{ - unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; - int ret; - struct IO_APIC_route_entry orig_entry; - - /* Avoid redundant programming */ - if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin); - orig_entry = ioapic_read_entry(attr->ioapic, pin); - if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity) - return 0; - return -EBUSY; - } - ret = io_apic_setup_irq_pin(irq, node, attr); - if (!ret) - set_bit(pin, ioapics[ioapic_idx].pin_programmed); - return ret; -} - static int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3525,22 +3471,6 @@ int __init arch_probe_nr_irqs(void) return 0; } -int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) -{ - int node; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - irq_attr->ioapic); - return -EINVAL; - } - - node = dev ? dev_to_node(dev) : cpu_to_node(0); - - return io_apic_setup_irq_pin_once(irq, node, irq_attr); -} - #ifdef CONFIG_X86_32 static int __init io_apic_get_unique_id(int ioapic, int apic_id) { -- cgit v1.2.2 From 16ee7b3dcc56be14b9a813612cff2cc2339cdced Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:04 +0800 Subject: x86, irq: Simplify the way to handle ISA IRQ On startup, setup_IO_APIC_irqs() will program all IOAPIC pins for ISA IRQs. Later when mp_map_pin_to_irq() is called, it just returns ISA IRQ number without programming corresponding IOAPIC pin. This patch consolidates the way to program IOAPIC pins for both ISA and non-ISA IRQs into mp_map_pin_to_irq() as below: 1) For ISA IRQs, mp_irqs array is used to map IOAPIC pin to IRQ and mp_irqdomain_map() is used to actually program the pin. 2) For non-ISA IRQs, irqdomain is used to map IOAPIC pin to IRQ, and mp_irqdomain_map() is also used to actually program the pin. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402302011-23642-36-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 15 +------ arch/x86/kernel/apic/io_apic.c | 98 ++++++++++++++++-------------------------- 2 files changed, 40 insertions(+), 73 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9add76f15cb0..fd4b6d2e436c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -101,17 +101,6 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { #define ACPI_INVALID_GSI INT_MIN -static int map_gsi_to_irq(unsigned int gsi, unsigned int flags) -{ - int i; - - for (i = 0; i < nr_legacy_irqs(); i++) - if (isa_irq_to_gsi[i] == gsi) - return i; - - return mp_map_gsi_to_irq(gsi, flags); -} - /* * This is just a simple wrapper around early_ioremap(), * with sanity checks for phys == 0 and size == 0. @@ -422,7 +411,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, return -1; } - irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); if (irq < 0) return irq; @@ -603,7 +592,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - int irq = map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); if (irq >= 0) { *irqp = irq; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6e3d4c771832..8485d904b653 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -204,8 +204,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -static int io_apic_setup_irq_pin(unsigned int irq, int node, - struct io_apic_irq_attr *attr); static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ @@ -1021,6 +1019,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); struct mp_pin_info *info = mp_pin_info(ioapic, pin); + if (!domain) { + /* + * Provide an identity mapping of gsi == irq except on truly + * weird platforms that have non isa irqs in the first 16 gsis. + */ + return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; + } + + mutex_lock(&ioapic_mutex); + /* * Don't use irqdomain to manage ISA IRQs because there may be * multiple IOAPIC pins sharing the same ISA IRQ number and @@ -1033,28 +1041,30 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, * the interrupt routing logic. Thus there may be multiple pins * sharing the same legacy IRQ number when ACPI is disabled. */ - if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) - return mp_irqs[idx].srcbusirq; - - if (!domain) { - /* - * Provide an identity mapping of gsi == irq except on truly - * weird platforms that have non isa irqs in the first 16 gsis. - */ - return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; + if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { + irq = mp_irqs[idx].srcbusirq; + if (flags & IOAPIC_MAP_ALLOC) { + if (info->count == 0 && + mp_irqdomain_map(domain, irq, pin) != 0) + irq = -1; + + /* special handling for timer IRQ0 */ + if (irq == 0) + info->count++; + } + } else { + irq = irq_find_mapping(domain, pin); + if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) + irq = alloc_irq_from_domain(domain, gsi, pin); } - mutex_lock(&ioapic_mutex); - irq = irq_find_mapping(domain, pin); - if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) - irq = alloc_irq_from_domain(domain, gsi, pin); - if (flags & IOAPIC_MAP_ALLOC) { if (irq > 0) info->count++; else if (info->count == 0) info->set = 0; } + mutex_unlock(&ioapic_mutex); return irq > 0 ? irq : -1; @@ -1471,55 +1481,23 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); } -static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) -{ - if (idx != -1) - return false; - - apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", - mpc_ioapic_id(ioapic_idx), pin); - return true; -} - -static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) -{ - int idx, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - unsigned int pin, irq; - - for_each_pin(ioapic_idx, pin) { - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) - continue; - - irq = pin_2_irq(idx, ioapic_idx, pin, - ioapic_idx ? 0 : IOAPIC_MAP_ALLOC); - if (irq < 0 || !mp_init_irq_at_boot(ioapic_idx, irq)) - continue; - - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(ioapic_idx, irq)) - continue; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin(irq, node, &attr); - } -} - static void __init setup_IO_APIC_irqs(void) { - unsigned int ioapic_idx; + unsigned int ioapic, pin; + int idx; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for_each_ioapic(ioapic_idx) - __io_apic_setup_irqs(ioapic_idx); + for_each_ioapic_pin(ioapic, pin) { + idx = find_irq_entry(ioapic, pin, mp_INT); + if (idx < 0) + apic_printk(APIC_VERBOSE, + KERN_DEBUG " apic %d pin %d not connected\n", + mpc_ioapic_id(ioapic), pin); + else + pin_2_irq(idx, ioapic, pin, + ioapic ? 0 : IOAPIC_MAP_ALLOC); + } } /* -- cgit v1.2.2 From df334bead7e94772c41745af9f329383067d44ae Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:06 +0800 Subject: x86, irq: Introduce helper functions to release IOAPIC pin Introduce function mp_unmap_irq() to release IOAPIC IRQ when IRQ is not used any more, which will typically called by pcibios_disabled_irq. And function mp_irqdomain_unmap() is a common implementation of irq_domain_ops.unmap for IOAPIC. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-38-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 3 +++ arch/x86/kernel/apic/io_apic.c | 61 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index af6f9d4309bf..0aeed5ca356e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -187,10 +187,12 @@ extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); +extern void mp_unmap_irq(int irq); extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); +extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); extern void __init pre_init_apic_IRQ0(void); @@ -234,6 +236,7 @@ static inline void ioapic_insert_resources(void) { } static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } +static inline void mp_unmap_irq(int irq) { } static inline int save_ioapic_entries(void) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8485d904b653..8d80a8f1d670 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -467,6 +467,21 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi return 0; } +static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) +{ + struct irq_pin_list **last, *entry; + + last = &cfg->irq_2_pin; + for_each_irq_pin(entry, cfg->irq_2_pin) + if (entry->apic == apic && entry->pin == pin) { + *last = entry->next; + kfree(entry); + return; + } else { + last = &entry->next; + } +} + static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { if (__add_pin_to_irq_node(cfg, node, apic, pin)) @@ -1119,6 +1134,31 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); } +void mp_unmap_irq(int irq) +{ + struct irq_data *data = irq_get_irq_data(irq); + struct mp_pin_info *info; + int ioapic, pin; + + if (!data || !data->domain) + return; + + ioapic = (int)(long)data->domain->host_data; + pin = (int)data->hwirq; + info = mp_pin_info(ioapic, pin); + + mutex_lock(&ioapic_mutex); + if (--info->count == 0) { + info->set = 0; + if (irq < nr_legacy_irqs() && + ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY) + mp_irqdomain_unmap(data->domain, irq); + else + irq_dispose_mapping(irq); + } + mutex_unlock(&ioapic_mutex); +} + /* * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules @@ -3878,6 +3918,27 @@ int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, return io_apic_setup_irq_pin(virq, info->node, &attr); } +void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) +{ + struct irq_data *data = irq_get_irq_data(virq); + struct irq_cfg *cfg = irq_cfg(virq); + int ioapic = (int)(long)domain->host_data; + int pin = (int)data->hwirq; + + /* + * Skip the timer IRQ if there's a quirk handler installed and if it + * returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(ioapic, virq)) + return; + + ioapic_mask_entry(ioapic, pin); + __remove_pin_from_irq(cfg, ioapic, pin); + WARN_ON(cfg->irq_2_pin != NULL); + arch_teardown_hwirq(virq); +} + int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) { int ret = 0; -- cgit v1.2.2 From 6a38fa0e3c94dfd1394a71a2d47c9c4d47367374 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 10 Jun 2014 14:16:27 +0800 Subject: x86, irq, ACPI: Release IOAPIC pin when PCI device is disabled Release IOAPIC pin associated with PCI device when the PCI device is disabled. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Len Brown Cc: Pavel Machek Link: http://lkml.kernel.org/r/1402380987-32577-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fd4b6d2e436c..8c28023924bf 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -421,8 +421,24 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, return irq; } +static void mp_unregister_gsi(u32 gsi) +{ + int irq; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return; + + if (acpi_gbl_FADT.sci_interrupt == gsi) + return; + + irq = mp_map_gsi_to_irq(gsi, 0); + if (irq > 0) + mp_unmap_irq(irq); +} + static struct irq_domain_ops acpi_irqdomain_ops = { .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, }; static int __init @@ -640,8 +656,16 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, return irq; } +static void acpi_unregister_gsi_ioapic(u32 gsi) +{ +#ifdef CONFIG_X86_IO_APIC + mp_unregister_gsi(gsi); +#endif +} + int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; +void (*__acpi_unregister_gsi)(u32 gsi) = NULL; #ifdef CONFIG_ACPI_SLEEP int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel; @@ -661,6 +685,8 @@ EXPORT_SYMBOL_GPL(acpi_register_gsi); void acpi_unregister_gsi(u32 gsi) { + if (__acpi_unregister_gsi) + __acpi_unregister_gsi(gsi); } EXPORT_SYMBOL_GPL(acpi_unregister_gsi); @@ -668,6 +694,7 @@ static void __init acpi_set_irq_model_ioapic(void) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; __acpi_register_gsi = acpi_register_gsi_ioapic; + __acpi_unregister_gsi = acpi_unregister_gsi_ioapic; acpi_ioapic = 1; } -- cgit v1.2.2 From c03b3b0738a56cf283b0d05256988d5e3c8bd719 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:08 +0800 Subject: x86, irq, mpparse: Release IOAPIC pin when PCI device is disabled Release IOAPIC pin associated with PCI device when the PCI device is disabled. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-40-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mpparse.c | 1 + arch/x86/pci/irq.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index faf503aa3b70..fde86d2b79f8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -115,6 +115,7 @@ static void __init MP_bus_info(struct mpc_bus *m) static struct irq_domain_ops mp_ioapic_irqdomain_ops = { .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, }; static void __init MP_ioapic_info(struct mpc_ioapic *m) diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index e4200e5e775e..748cfe8ab322 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -26,6 +26,7 @@ static int acer_tm360_irqrouting; static struct irq_routing_table *pirq_table; static int pirq_enable_irq(struct pci_dev *dev); +static void pirq_disable_irq(struct pci_dev *dev); /* * Never use: 0, 1, 2 (timer, keyboard, and cascade) @@ -53,7 +54,7 @@ struct irq_router_handler { }; int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; -void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; +void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq; /* * Check passed address for the PCI IRQ Routing Table signature @@ -1186,7 +1187,7 @@ void pcibios_penalize_isa_irq(int irq, int active) static int pirq_enable_irq(struct pci_dev *dev) { - u8 pin; + u8 pin = 0; pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin && !pcibios_lookup_irq(dev, 1)) { @@ -1252,3 +1253,11 @@ static int pirq_enable_irq(struct pci_dev *dev) } return 0; } + +static void pirq_disable_irq(struct pci_dev *dev) +{ + if (io_apic_assign_pci_irqs && dev->irq) { + mp_unmap_irq(dev->irq); + dev->irq = 0; + } +} -- cgit v1.2.2 From 8a3e533df17f821a4d25dd2969d2f90d7c168b46 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:09 +0800 Subject: x86, irq, SFI: Release IOAPIC pin when PCI device is disabled Release IOAPIC pin associated with PCI device when the PCI device is disabled. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-41-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/intel_mid_pci.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 337d165c64f1..09fece368592 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -227,6 +227,12 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) return 0; } +static void intel_mid_pci_irq_disable(struct pci_dev *dev) +{ + if (dev->irq > 0) + mp_unmap_irq(dev->irq); +} + struct pci_ops intel_mid_pci_ops = { .read = pci_read, .write = pci_write, @@ -243,6 +249,7 @@ int __init intel_mid_pci_init(void) pr_info("Intel MID platform detected, using MID PCI ops\n"); pci_mmcfg_late_init(); pcibios_enable_irq = intel_mid_pci_irq_enable; + pcibios_disable_irq = intel_mid_pci_irq_disable; pci_root_ops = intel_mid_pci_ops; pci_soc_mode = 1; /* Continue with standard init */ -- cgit v1.2.2 From 00f49c29b3298806c74589a4ed016a2afb359e98 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:10 +0800 Subject: x86, irq, devicetree: Release IOAPIC pin when PCI device is disabled Release IOAPIC pin associated with PCI device when the PCI device is disabled. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Rob Herring Cc: Michal Simek Cc: Tony Lindgren Link: http://lkml.kernel.org/r/1402302011-23642-42-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/devicetree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index f33bb436b8f4..3d3503351242 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -226,6 +226,7 @@ static int ioapic_xlate(struct irq_domain *domain, const struct irq_domain_ops ioapic_irq_domain_ops = { .map = mp_irqdomain_map, + .unmap = mp_irqdomain_unmap, .xlate = ioapic_xlate, }; -- cgit v1.2.2 From b81975eade8c6495f3c4d6746d22bdc95f617777 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 9 Jun 2014 16:20:11 +0800 Subject: x86, irq: Clean up irqdomain transition code Now we have completely switched to irqdomain, so clean up transition code in IOAPIC drivers. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1402302011-23642-43-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8d80a8f1d670..a44dce8cc559 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -86,6 +86,7 @@ static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_RAW_SPINLOCK(vector_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; +static int ioapic_initialized; struct mp_pin_info { int trigger; @@ -1034,13 +1035,8 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); struct mp_pin_info *info = mp_pin_info(ioapic, pin); - if (!domain) { - /* - * Provide an identity mapping of gsi == irq except on truly - * weird platforms that have non isa irqs in the first 16 gsis. - */ - return gsi >= nr_legacy_irqs() ? gsi : gsi_top + gsi; - } + if (!domain) + return -1; mutex_lock(&ioapic_mutex); @@ -2986,6 +2982,8 @@ void __init setup_IO_APIC(void) init_IO_APIC_traps(); if (nr_legacy_irqs()) check_timer(); + + ioapic_initialized = 1; } /* @@ -3461,12 +3459,11 @@ static int __init io_apic_get_redir_entries(int ioapic) unsigned int arch_dynirq_lower_bound(unsigned int from) { - unsigned int min = gsi_top + nr_legacy_irqs(); - - if (ioapic_dynirq_base) - return ioapic_dynirq_base; - - return from < min ? min : from; + /* + * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use + * gsi_top if ioapic_dynirq_base hasn't been initialized yet. + */ + return ioapic_initialized ? ioapic_dynirq_base : gsi_top; } int __init arch_probe_nr_irqs(void) @@ -3841,10 +3838,7 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ioapics[idx].mp_config.apicaddr = address; ioapics[idx].irqdomain = NULL; - if (cfg) - ioapics[idx].irqdomain_cfg = *cfg; - else - ioapics[idx].irqdomain_cfg.type = IOAPIC_DOMAIN_INVALID; + ioapics[idx].irqdomain_cfg = *cfg; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); -- cgit v1.2.2 From 61b165caa686b8334379293d0e241f740fac195a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 13 Jun 2014 18:03:56 +0530 Subject: ASoC: Intel: add mrfld pipelines Merrifield DSP used various pipelines to identify the streams and processing modules. Add these defination in the pcm driver and also add a table for device entries to firmware pipeline id conversion Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- arch/x86/include/asm/platform_sst_audio.h | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 arch/x86/include/asm/platform_sst_audio.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h new file mode 100644 index 000000000000..0a4e140315b6 --- /dev/null +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -0,0 +1,78 @@ +/* + * platform_sst_audio.h: sst audio platform data header file + * + * Copyright (C) 2012-14 Intel Corporation + * Author: Jeeja KP + * Omair Mohammed Abdullah + * Vinod Koul ,vinod.koul@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#ifndef _PLATFORM_SST_AUDIO_H_ +#define _PLATFORM_SST_AUDIO_H_ + +#include + +enum sst_audio_task_id_mrfld { + SST_TASK_ID_NONE = 0, + SST_TASK_ID_SBA = 1, + SST_TASK_ID_MEDIA = 3, + SST_TASK_ID_MAX = SST_TASK_ID_MEDIA, +}; + +/* Device IDs for Merrifield are Pipe IDs, + * ref: DSP spec v0.75 */ +enum sst_audio_device_id_mrfld { + /* Output pipeline IDs */ + PIPE_ID_OUT_START = 0x0, + PIPE_CODEC_OUT0 = 0x2, + PIPE_CODEC_OUT1 = 0x3, + PIPE_SPROT_LOOP_OUT = 0x4, + PIPE_MEDIA_LOOP1_OUT = 0x5, + PIPE_MEDIA_LOOP2_OUT = 0x6, + PIPE_VOIP_OUT = 0xC, + PIPE_PCM0_OUT = 0xD, + PIPE_PCM1_OUT = 0xE, + PIPE_PCM2_OUT = 0xF, + PIPE_MEDIA0_OUT = 0x12, + PIPE_MEDIA1_OUT = 0x13, +/* Input Pipeline IDs */ + PIPE_ID_IN_START = 0x80, + PIPE_CODEC_IN0 = 0x82, + PIPE_CODEC_IN1 = 0x83, + PIPE_SPROT_LOOP_IN = 0x84, + PIPE_MEDIA_LOOP1_IN = 0x85, + PIPE_MEDIA_LOOP2_IN = 0x86, + PIPE_VOIP_IN = 0x8C, + PIPE_PCM0_IN = 0x8D, + PIPE_PCM1_IN = 0x8E, + PIPE_MEDIA0_IN = 0x8F, + PIPE_MEDIA1_IN = 0x90, + PIPE_MEDIA2_IN = 0x91, + PIPE_RSVD = 0xFF, +}; + +/* The stream map for each platform consists of an array of the below + * stream map structure. + */ +struct sst_dev_stream_map { + u8 dev_num; /* device id */ + u8 subdev_num; /* substream */ + u8 direction; + u8 device_id; /* fw id */ + u8 task_id; /* fw task */ + u8 status; +}; + +struct sst_platform_data { + /* Intel software platform id*/ + struct sst_dev_stream_map *pdev_strm_map; + unsigned int strm_map_size; +}; + +int add_sst_platform_device(void); +#endif + -- cgit v1.2.2 From 411cf9ee2946492c0ac7eca48422fcf94a723ce5 Mon Sep 17 00:00:00 2001 From: Oren Twaig Date: Sun, 29 Jun 2014 13:01:08 +0300 Subject: x86, vsmp: Remove is_vsmp_box() from apic_is_clustered_box() When a vSMP Foundation box is detected, the function apic_cluster_num() counts the number of APIC clusters found. If more than one found, a multi board configuration is assumed, and TSC marked as unstable. This behavior is incorrect as vSMP Foundation may use processors from single node only, attached to memory of other nodes - and such node may have more than one APIC cluster (typically any recent intel box has more than single APIC_CLUSTERID(x)). To fix this, we simply remove the code which detects a vSMP Foundation box and affects apic_is_clusted_box() return value. This can be done because later the kernel checks by itself if the TSC is stable using the check_tsc_sync_[source|target]() functions and marks TSC as unstable if needed. Acked-by: Shai Fultheim Signed-off-by: Oren Twaig Link: http://lkml.kernel.org/r/1404036068-11674-1-git-send-email-oren@scalemp.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 8 ------ arch/x86/kernel/apic/apic.c | 60 +-------------------------------------------- 2 files changed, 1 insertion(+), 67 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 69ed79aa9085..b40ea7e4bae9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -85,14 +85,6 @@ static inline bool apic_from_smp_config(void) #include #endif -#ifdef CONFIG_X86_64 -extern int is_vsmp_box(void); -#else -static inline int is_vsmp_box(void) -{ - return 0; -} -#endif extern int setup_profiling_timer(unsigned int); static inline void native_apic_mem_write(u32 reg, u32 v) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ca1bd75e3de2..6b35d308688c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2451,51 +2451,6 @@ static void apic_pm_activate(void) { } #ifdef CONFIG_X86_64 -static int apic_cluster_num(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < nr_cpu_ids; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - return clusters; -} - static int multi_checked; static int multi; @@ -2540,20 +2495,7 @@ static void dmi_check_multi(void) int apic_is_clustered_box(void) { dmi_check_multi(); - if (multi) - return 1; - - if (!is_vsmp_box()) - return 0; - - /* - * ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (apic_cluster_num() > 1) - return 1; - - return 0; + return multi; } #endif -- cgit v1.2.2 From 8d693b911bb9c57009c24cb1772d205b84c7985c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 11 Jul 2014 11:51:35 -0400 Subject: xen: Introduce 'xen_nopv' to disable PV extensions for HVM guests. By default when CONFIG_XEN and CONFIG_XEN_PVHVM kernels are run, they will enable the PV extensions (drivers, interrupts, timers, etc) - which is the best option for the majority of use cases. However, in some cases (kexec not fully working, benchmarking) we want to disable Xen PV extensions. As such introduce the 'xen_nopv' parameter that will do it. This parameter is intended only for HVM guests as the Xen PV guests MUST boot with PV extensions. However, even if you use 'xen_nopv' on Xen PV guests it will be ignored. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel --- [v2: s/off/xen_nopv/ per Boris Ostrovsky recommendation.] [v3: Add Reviewed-by] [v4: Clarify that this is only for HVM guests] --- arch/x86/xen/enlighten.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ffb101e45731..eb822391b84c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1826,8 +1826,19 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_mmu_ops(); } +static bool xen_nopv = false; +static __init int xen_parse_nopv(char *arg) +{ + xen_nopv = true; + return 0; +} +early_param("xen_nopv", xen_parse_nopv); + static uint32_t __init xen_hvm_platform(void) { + if (xen_nopv) + return 0; + if (xen_pv_domain()) return 0; @@ -1836,6 +1847,8 @@ static uint32_t __init xen_hvm_platform(void) bool xen_hvm_need_lapic(void) { + if (xen_nopv) + return false; if (xen_pv_domain()) return false; if (!xen_hvm_domain()) -- cgit v1.2.2 From a4355e6749113d424cd15852c73e22c1ef1bb004 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 23 Jun 2014 10:30:35 +0800 Subject: x86: intel-mid: Use the new io_apic interfaces Commit 9f354b0252b8 "x86, irq: Clean up unused IOAPIC interface" kills interface io_apic_set_pci_routing(), so change arch/x86/platform/ intel-mid/device_libs/platform_wdt.c to use new interfaces. Due to hardware resource restriction, this patch only passes compilation without functional tests. Signed-off-by: Jiang Liu Acked-by: David Cohen Cc: Tony Luck Cc: Tang Feng Cc: Yinghai Lu Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Tony Luck Link: http://lkml.kernel.org/r/1403490643-26187-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- .../platform/intel-mid/device_libs/platform_wdt.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c index 973cf3bfa9fd..0b283d4d0ad7 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -26,28 +26,18 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { - int ioapic; - int irq; + int gsi; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; - struct io_apic_irq_attr irq_attr = { 0 }; if (!pdata) return -EINVAL; - irq = pdata->irq; - ioapic = mp_find_ioapic(irq); - if (ioapic >= 0) { - int ret; - irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = irq; - irq_attr.trigger = 1; - /* irq_attr.polarity = 0; -> Active high */ - ret = io_apic_set_pci_routing(NULL, irq, &irq_attr); - if (ret) - return ret; - } else { + /* IOAPIC builds identity mapping between GSI and IRQ on MID */ + gsi = pdata->irq; + if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) || + mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) { dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", - irq); + gsi); return -EINVAL; } -- cgit v1.2.2 From 162e371712768248a38646eefa71af38b6e0f8ce Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 11 Jul 2014 16:42:34 +0100 Subject: x86/xen: safely map and unmap grant frames when in atomic context arch_gnttab_map_frames() and arch_gnttab_unmap_frames() are called in atomic context but were calling alloc_vm_area() which might sleep. Also, if a driver attempts to allocate a grant ref from an interrupt and the table needs expanding, then the CPU may already by in lazy MMU mode and apply_to_page_range() will BUG when it tries to re-enable lazy MMU mode. These two functions are only used in PV guests. Introduce arch_gnttab_init() to allocates the virtual address space in advance. Avoid the use of apply_to_page_range() by using saving and using the array of PTE addresses from the alloc_vm_area() call. N.B. 'alloc_vm_area' pre-allocates the pagetable so there is no need to worry about having to do a PGD/PUD/PMD walk (like apply_to_page_range does) and we can instead do set_pte. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk ---- [v2: Add comment about alloc_vm_area] [v3: Fix compile error found by 0-day bot] --- arch/x86/xen/grant-table.c | 148 ++++++++++++++++++++++++++++----------------- 1 file changed, 91 insertions(+), 57 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c98583588580..ebfa9b2c871d 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -36,99 +36,133 @@ #include #include +#include #include #include #include #include +#include #include -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +static struct gnttab_vm_area { + struct vm_struct *area; + pte_t **ptes; +} gnttab_shared_vm_area, gnttab_status_vm_area; + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + void **__shared) { - unsigned long **frames = (unsigned long **)data; + void *shared = *__shared; + unsigned long addr; + unsigned long i; - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} + if (shared == NULL) + *__shared = shared = gnttab_shared_vm_area.area->addr; -/* - * This function is used to map shared frames to store grant status. It is - * different from map_pte_fn above, the frames type here is uint64_t. - */ -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - uint64_t **frames = (uint64_t **)data; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; return 0; } -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) { + grant_status_t *shared = *__shared; + unsigned long addr; + unsigned long i; + + if (shared == NULL) + *__shared = shared = gnttab_status_vm_area.area->addr; + + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - void **__shared) +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { - int rc; - void *shared = *__shared; + pte_t **ptes; + unsigned long addr; + unsigned long i; - if (shared == NULL) { - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } + if (shared == gnttab_status_vm_area.area->addr) + ptes = gnttab_status_vm_area.ptes; + else + ptes = gnttab_shared_vm_area.ptes; - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); - return rc; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); + addr += PAGE_SIZE; + } } -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) +static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) { - int rc; - grant_status_t *shared = *__shared; + area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL); + if (area->ptes == NULL) + return -ENOMEM; - if (shared == NULL) { - /* No need to pass in PTE as we are going to do it - * in apply_to_page_range anyhow. */ - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; + area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes); + if (area->area == NULL) { + kfree(area->ptes); + return -ENOMEM; } - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn_status, &frames); - return rc; + return 0; } -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) +static void arch_gnttab_vfree(struct gnttab_vm_area *area) +{ + free_vm_area(area->area); + kfree(area->ptes); +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) { - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + int ret; + + if (!xen_pv_domain()) + return 0; + + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + if (ret < 0) + return ret; + + /* + * Always allocate the space for the status frames in case + * we're migrated to a host with V2 support. + */ + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); + if (ret < 0) + goto err; + + return 0; + err: + arch_gnttab_vfree(&gnttab_shared_vm_area); + return -ENOMEM; } + #ifdef CONFIG_XEN_PVH #include #include -#include #include static int __init xlated_setup_gnttab_pages(void) { -- cgit v1.2.2 From 438b33c7145ca8a5131a30c36d8f59bce119a19a Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 2 Jul 2014 11:25:29 +0100 Subject: xen/grant-table: remove support for V2 tables Since 11c7ff17c9b6dbf3a4e4f36be30ad531a6cf0ec9 (xen/grant-table: Force to use v1 of grants.) the code for V2 grant tables is not used. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/grant-table.c | 60 ++++------------------------------------------ 1 file changed, 5 insertions(+), 55 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index ebfa9b2c871d..c0413046483a 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -49,7 +49,7 @@ static struct gnttab_vm_area { struct vm_struct *area; pte_t **ptes; -} gnttab_shared_vm_area, gnttab_status_vm_area; +} gnttab_shared_vm_area; int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, unsigned long max_nr_gframes, @@ -73,43 +73,16 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, return 0; } -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) -{ - grant_status_t *shared = *__shared; - unsigned long addr; - unsigned long i; - - if (shared == NULL) - *__shared = shared = gnttab_status_vm_area.area->addr; - - addr = (unsigned long)shared; - - for (i = 0; i < nr_gframes; i++) { - set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], - mfn_pte(frames[i], PAGE_KERNEL)); - addr += PAGE_SIZE; - } - - return 0; -} - void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { - pte_t **ptes; unsigned long addr; unsigned long i; - if (shared == gnttab_status_vm_area.area->addr) - ptes = gnttab_status_vm_area.ptes; - else - ptes = gnttab_shared_vm_area.ptes; - addr = (unsigned long)shared; for (i = 0; i < nr_gframes; i++) { - set_pte_at(&init_mm, addr, ptes[i], __pte(0)); + set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], + __pte(0)); addr += PAGE_SIZE; } } @@ -129,35 +102,12 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) return 0; } -static void arch_gnttab_vfree(struct gnttab_vm_area *area) +int arch_gnttab_init(unsigned long nr_shared) { - free_vm_area(area->area); - kfree(area->ptes); -} - -int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) -{ - int ret; - if (!xen_pv_domain()) return 0; - ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); - if (ret < 0) - return ret; - - /* - * Always allocate the space for the status frames in case - * we're migrated to a host with V2 support. - */ - ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); - if (ret < 0) - goto err; - - return 0; - err: - arch_gnttab_vfree(&gnttab_shared_vm_area); - return -ENOMEM; + return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); } #ifdef CONFIG_XEN_PVH -- cgit v1.2.2 From 48dc92b9fc3926844257316e75ba11eb5c742b2c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 16:08:24 -0700 Subject: seccomp: add "seccomp" syscall This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). In addition to the TSYNC flag later in this patch series, there is a non-zero chance that this syscall could be used for configuring a fixed argument area for seccomp-tracer-aware processes to pass syscall arguments in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter" for this syscall. Additionally, this syscall uses operation, flags, and user pointer for arguments because strictly passing arguments via a user pointer would mean seccomp itself would be unable to trivially filter the seccomp syscall itself. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.2 From 8a1664be0b922dd6afd60eca96a992ef5ec22c40 Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Fri, 18 Jul 2014 18:02:52 +0800 Subject: ACPI: add config for BIOS table scan With the addition of ARM64 that does not have a traditional BIOS to scan, add a config option which is selected on x86 (ia64 doesn't need it either, it is EFI/UEFI based system) to do the traditional BIOS scanning for tables. Signed-off-by: Graeme Gregory Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d24887b645dc..5617750262db 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -132,6 +132,7 @@ config X86 select GENERIC_CPU_AUTOPROBE select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select ACPI_LEGACY_TABLES_LOOKUP if ACPI config INSTRUCTION_DECODER def_bool y -- cgit v1.2.2 From b50154d53e668314542ef9a592accc37137a8f65 Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Fri, 18 Jul 2014 18:02:53 +0800 Subject: ACPI: Don't use acpi_lapic in ACPI core code Now ARM64 support is being added to ACPI so architecture specific values can not be used in core ACPI code. Following on the patch "ACPI / processor: Check if LAPIC is present during initialization" which uses acpi_lapic in acpi_processor.c, on ARM64 platform, GIC is used instead of local APIC, so acpi_lapic is not a suitable value for ARM64. What is actually important at this point is if there is/are CPU entry/entries (Local APIC/SAPIC, GICC) in MADT, so introduce acpi_has_cpu_in_madt() to be arch specific and generic. Signed-off-by: Graeme Gregory Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index e06225eda635..0ab4f9fd2687 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -121,6 +121,11 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); } +static inline bool acpi_has_cpu_in_madt(void) +{ + return !!acpi_lapic; +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 -- cgit v1.2.2 From 46ba51ea8f8639da32c55744b35479fdfb4e7232 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 18 Jul 2014 18:02:54 +0800 Subject: ACPI / processor: Introduce ARCH_MIGHT_HAVE_ACPI_PDC The use of _PDC is deprecated in ACPI 3.0 in favor of _OSC, as ARM platform is supported only in ACPI 5.0 or higher version, _PDC will not be used in ARM platform, so make Make _PDC only for platforms with Intel CPUs. Introduce ARCH_MIGHT_HAVE_ACPI_PDC and move _PDC related code in ACPI processor driver into a single file processor_pdc.c, make x86 and ia64 select it when ACPI is enabled. This patch also use pr_* to replace printk to fix the checkpatch warning and factor acpi_processor_alloc_pdc() a little bit to avoid duplicate pr_err() code. Suggested-by: Robert Richter Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5617750262db..70c43b5371bb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO -- cgit v1.2.2 From d334c823b27401721591e0f1220050a41af08165 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Jul 2014 16:58:08 +0800 Subject: ACPICA: Linux: Add support to exclude inclusion. The forthcoming patch will make to be visible to all kernel source code. Thus for the architectures that do not support ACPI and haven't implemented , we need to make it excluded. Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/acenv.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 66873297e9f5..1b010a859b8b 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -18,8 +18,6 @@ #define ACPI_FLUSH_CPU_CACHE() wbinvd() -#ifdef CONFIG_ACPI - int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); @@ -44,6 +42,4 @@ int __acpi_release_global_lock(unsigned int *lock); : "=r"(n_hi), "=r"(n_lo) \ : "0"(n_hi), "1"(n_lo)) -#endif - #endif /* _ASM_X86_ACENV_H */ -- cgit v1.2.2 From 24e4a8c3e8868874835b0f1ad6dd417341e99822 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 16 Jul 2014 21:03:53 +0000 Subject: ktime: Kill non-scalar ktime_t implementation for 2038 The non-scalar ktime_t implementation is basically a timespec which has to be changed to support dates past 2038 on 32bit systems. This patch removes the non-scalar ktime_t implementation, forcing the scalar s64 nanosecond version on all architectures. This may have additional performance overhead on some 32bit systems when converting between ktime_t and timespec structures, however the majority of 32bit systems (arm and i386) were already using scalar ktime_t, so no performance regressions will be seen on those platforms. On affected platforms, I'm open to finding optimizations, including avoiding converting to timespecs where possible. [ tglx: We can now cleanup the ktime_t.tv64 mess, but thats a different issue and we can throw a coccinelle script at it ] Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749ef0fdc..7fa17b5ce668 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -111,7 +111,6 @@ config X86 select ARCH_CLOCKSOURCE_DATA select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL - select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 -- cgit v1.2.2 From bb0b58127c5add364cb597d58b1cf66eb279eae8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:52 +0000 Subject: x86: kvm: Use ktime_get_boot_ns() Use the new nanoseconds based interface and get rid of the timespec conversion dance. Signed-off-by: Thomas Gleixner Cc: Gleb Natapov Cc: kvm@vger.kernel.org Acked-by: Paolo Bonzini Signed-off-by: John Stultz --- arch/x86/kvm/x86.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6449334ec45..65c430512132 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1109,11 +1109,7 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, static inline u64 get_kernel_ns(void) { - struct timespec ts; - - ktime_get_ts(&ts); - monotonic_to_bootbased(&ts); - return timespec_to_ns(&ts); + return ktime_get_boot_ns(); } #ifdef CONFIG_X86_64 -- cgit v1.2.2 From cbcf2dd3b3d4d990610259e8d878fc8dc1f17d80 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:54 +0000 Subject: x86: kvm: Make kvm_get_time_and_clockread() nanoseconds based Convert the relevant base data right away to nanoseconds instead of doing the conversion on every readout. Reduces text size by 160 bytes. Signed-off-by: Thomas Gleixner Cc: Gleb Natapov Cc: kvm@vger.kernel.org Acked-by: Paolo Bonzini Signed-off-by: John Stultz --- arch/x86/kvm/x86.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65c430512132..63832f5110b6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -984,9 +984,8 @@ struct pvclock_gtod_data { u32 shift; } clock; - /* open coded 'struct timespec' */ - u64 monotonic_time_snsec; - time_t monotonic_time_sec; + u64 boot_ns; + u64 nsec_base; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -994,6 +993,9 @@ static struct pvclock_gtod_data pvclock_gtod_data; static void update_pvclock_gtod(struct timekeeper *tk) { struct pvclock_gtod_data *vdata = &pvclock_gtod_data; + u64 boot_ns; + + boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot)); write_seqcount_begin(&vdata->seq); @@ -1004,17 +1006,8 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->clock.mult = tk->mult; vdata->clock.shift = tk->shift; - vdata->monotonic_time_sec = tk->xtime_sec - + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec - + (tk->wall_to_monotonic.tv_nsec - << tk->shift); - while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { - vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; - vdata->monotonic_time_sec++; - } + vdata->boot_ns = boot_ns; + vdata->nsec_base = tk->xtime_nsec; write_seqcount_end(&vdata->seq); } @@ -1371,23 +1364,22 @@ static inline u64 vgettsc(cycle_t *cycle_now) return v * gtod->clock.mult; } -static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) +static int do_monotonic_boot(s64 *t, cycle_t *cycle_now) { + struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; - u64 ns; int mode; - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + u64 ns; - ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; - ts->tv_sec = gtod->monotonic_time_sec; - ns = gtod->monotonic_time_snsec; + ns = gtod->nsec_base; ns += vgettsc(cycle_now); ns >>= gtod->clock.shift; + ns += gtod->boot_ns; } while (unlikely(read_seqcount_retry(>od->seq, seq))); - timespec_add_ns(ts, ns); + *t = ns; return mode; } @@ -1395,19 +1387,11 @@ static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) /* returns true if host is using tsc clocksource */ static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now) { - struct timespec ts; - /* checked again under seqlock below */ if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) return false; - if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC) - return false; - - monotonic_to_bootbased(&ts); - *kernel_ns = timespec_to_ns(&ts); - - return true; + return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; } #endif -- cgit v1.2.2 From 09ec54429c6d10f87d1f084de53ae2c1c3a81108 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:12 +0000 Subject: clocksource: Move cycle_last validation to core code The only user of the cycle_last validation is the x86 TSC. In order to provide NMI safe accessor functions for clock monotonic and monotonic_raw we need to do that in the core. We can't do the TSC specific if (now < cycle_last) now = cycle_last; for the other wrapping around clocksources, but TSC has CLOCKSOURCE_MASK(64) which actually does not mask out anything so if now is less than cycle_last the subtraction will give a negative result. So we can check for that in clocksource_delta() and return 0 for that case. Implement and enable it for x86 Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/Kconfig | 1 + arch/x86/kernel/tsc.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7fa17b5ce668..d08e061c187a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,7 @@ config X86 select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS select ARCH_CLOCKSOURCE_DATA + select CLOCKSOURCE_VALIDATE_LAST_CYCLE select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL select GENERIC_STRNCPY_FROM_USER diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce126d5a..456c0e660c43 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -951,7 +951,7 @@ core_initcall(cpufreq_tsc); static struct clocksource clocksource_tsc; /* - * We compare the TSC to the cycle_last value in the clocksource + * We used to compare the TSC to the cycle_last value in the clocksource * structure to avoid a nasty time-warp. This can be observed in a * very small window right after one CPU updated cycle_last under * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which @@ -961,26 +961,23 @@ static struct clocksource clocksource_tsc; * due to the unsigned delta calculation of the time keeping core * code, which is necessary to support wrapping clocksources like pm * timer. + * + * This sanity check is now done in the core timekeeping code. + * checking the result of read_tsc() - cycle_last for being negative. + * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. */ static cycle_t read_tsc(struct clocksource *cs) { - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static void resume_tsc(struct clocksource *cs) -{ - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) - clocksource_tsc.cycle_last = 0; + return (cycle_t)get_cycles(); } +/* + * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, - .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, -- cgit v1.2.2 From 4a0e637738f06673725792d74eed67f8779b62c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:13 +0000 Subject: clocksource: Get rid of cycle_last cycle_last was added to the clocksource to support the TSC validation. We moved that to the core code, so we can get rid of the extra copy. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/kernel/vsyscall_gtod.c | 2 +- arch/x86/kvm/x86.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 9531fbb123ba..c3cb3c144591 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -32,7 +32,7 @@ void update_vsyscall(struct timekeeper *tk) /* copy vsyscall data */ vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->clock->cycle_last; + vdata->cycle_last = tk->cycle_last; vdata->mask = tk->clock->mask; vdata->mult = tk->mult; vdata->shift = tk->shift; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63832f5110b6..7b25125f3f42 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1001,7 +1001,7 @@ static void update_pvclock_gtod(struct timekeeper *tk) /* copy pvclock gtod data */ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.cycle_last = tk->cycle_last; vdata->clock.mask = tk->clock->mask; vdata->clock.mult = tk->mult; vdata->clock.shift = tk->shift; -- cgit v1.2.2 From d28ede83791defee9a81e558540699dc46dbbe13 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:16 +0000 Subject: timekeeping: Create struct tk_read_base and use it in struct timekeeper The members of the new struct are the required ones for the new NMI safe accessor to clcok monotonic. In order to reuse the existing timekeeping code and to make the update of the fast NMI safe timekeepers a simple memcpy use the struct for the timekeeper as well and convert all users. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mathieu Desnoyers Signed-off-by: John Stultz --- arch/x86/kernel/vsyscall_gtod.c | 23 ++++++++++++----------- arch/x86/kvm/x86.c | 14 +++++++------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index c3cb3c144591..c7d791f32b98 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->cycle_last; - vdata->mask = tk->clock->mask; - vdata->mult = tk->mult; - vdata->shift = tk->shift; + vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr.cycle_last; + vdata->mask = tk->tkr.mask; + vdata->mult = tk->tkr.mult; + vdata->shift = tk->tkr.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; + vdata->wall_time_snsec = tk->tkr.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec + vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->shift); + << tk->tkr.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; + ((u64)NSEC_PER_SEC) << tk->tkr.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> + tk->tkr.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7b25125f3f42..b7e57946d1c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -995,19 +995,19 @@ static void update_pvclock_gtod(struct timekeeper *tk) struct pvclock_gtod_data *vdata = &pvclock_gtod_data; u64 boot_ns; - boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot)); + boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); write_seqcount_begin(&vdata->seq); /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; + vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr.cycle_last; + vdata->clock.mask = tk->tkr.mask; + vdata->clock.mult = tk->tkr.mult; + vdata->clock.shift = tk->tkr.shift; vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->xtime_nsec; + vdata->nsec_base = tk->tkr.xtime_nsec; write_seqcount_end(&vdata->seq); } -- cgit v1.2.2 From 2695fb552cbef1029aa025a98acb80cc51d66de5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 24 Jul 2014 16:38:21 -0700 Subject: net: filter: rename 'struct sock_filter_int' into 'struct bpf_insn' eBPF is used by socket filtering, seccomp and soon by tracing and exposed to userspace, therefore 'sock_filter_int' name is not accurate. Rename it to 'bpf_insn' Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99bef86ed6df..71737a83f022 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -214,7 +214,7 @@ struct jit_context { static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { - struct sock_filter_int *insn = bpf_prog->insnsi; + struct bpf_insn *insn = bpf_prog->insnsi; int insn_cnt = bpf_prog->len; u8 temp[64]; int i; -- cgit v1.2.2 From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a8691b0ed76..5a62d91c96e7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2616,7 +2616,7 @@ out: return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; -- cgit v1.2.2 From 80a2670379b777ea45f2f6c73b2d2bc3f99066c8 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:24 -0700 Subject: x86, apic: Remove x86_32_numa_cpu_node callback Since commit b5660ba76b41 ("x86, platforms: Remove NUMAQ") removed NUMAQ, the x86_32_numa_cpu_node() apic callback has been obsolete. Remove it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302348060.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 8 -------- arch/x86/kernel/apic/apic.c | 11 ----------- 2 files changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b40ea7e4bae9..067c2f83e329 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -370,14 +370,6 @@ struct apic { * won't be applied properly during early boot in this case. */ int (*x86_32_early_logical_apicid)(int cpu); - - /* - * Optional method called from setup_local_APIC() after logical - * apicid is guaranteed to be known to initialize apicid -> node - * mapping if NUMA initialization hasn't done so already. Don't - * add new users. - */ - int (*x86_32_numa_cpu_node)(int cpu); #endif }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6b35d308688c..d705bb7ed91b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1342,17 +1342,6 @@ void setup_local_APIC(void) /* always use the value from LDR */ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = logical_smp_processor_id(); - - /* - * Some NUMA implementations (NUMAQ) don't initialize apicid to - * node mapping during NUMA init. Now that logical apicid is - * guaranteed to be known, give it another chance. This is already - * a bit too late - percpu allocation has already happened without - * proper NUMA affinity. - */ - if (apic->x86_32_numa_cpu_node) - set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), - apic->x86_32_numa_cpu_node(cpu)); #endif /* -- cgit v1.2.2 From 6ab1b27c849106647c42b3ea0681a039552e24fa Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:27 -0700 Subject: x86, apic: Replace trampoline physical addresses with defaults The trampoline_phys_{high,low} members of struct apic are always initialized to DEFAULT_TRAMPOLINE_PHYS_HIGH and TRAMPOLINE_PHYS_LOW, respectively. Hardwire the constants and remove the unneeded members. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302348330.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 9 +++------ arch/x86/include/asm/smpboot_hooks.h | 10 +++++----- arch/x86/kernel/apic/apic_flat_64.c | 4 ---- arch/x86/kernel/apic/apic_noop.c | 4 ---- arch/x86/kernel/apic/apic_numachip.c | 2 -- arch/x86/kernel/apic/bigsmp_32.c | 3 --- arch/x86/kernel/apic/probe_32.c | 3 --- arch/x86/kernel/apic/x2apic_cluster.c | 2 -- arch/x86/kernel/apic/x2apic_phys.c | 2 -- arch/x86/kernel/apic/x2apic_uv_x.c | 2 -- 10 files changed, 8 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 067c2f83e329..2201c262b00c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -335,9 +335,6 @@ struct apic { /* wakeup_secondary_cpu */ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - int trampoline_phys_low; - int trampoline_phys_high; - bool wait_for_init_deassert; void (*smp_callin_clear_local_apic)(void); void (*inquire_remote_apic)(int apicid); @@ -480,10 +477,10 @@ static inline unsigned default_get_apic_id(unsigned long x) } /* - * Warm reset vector default position: + * Warm reset vector position: */ -#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 -#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 +#define TRAMPOLINE_PHYS_LOW 0x467 +#define TRAMPOLINE_PHYS_HIGH 0x469 #ifdef CONFIG_X86_64 extern void apic_send_IPI_self(int vector); diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 49adfd7bb4a4..0da7409f0bec 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -17,11 +17,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_unlock_irqrestore(&rtc_lock, flags); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = - start_eip >> 4; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) = - start_eip & 0xf; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + start_eip & 0xf; pr_debug("3.\n"); } @@ -42,7 +42,7 @@ static inline void smpboot_restore_warm_reset_vector(void) CMOS_WRITE(0, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0; + *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7c1b29479513..7d56b65f3ddf 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -196,8 +196,6 @@ static struct apic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, @@ -312,8 +310,6 @@ static struct apic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 8c7c98249c20..c638efa6ad67 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -168,10 +168,6 @@ struct apic apic_noop = { .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, - /* should be safe */ - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a5b45df8bc88..c32f8827e3a4 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -246,8 +246,6 @@ static const struct apic apic_numachip __refconst = { .send_IPI_self = numachip_send_IPI_self, .wakeup_secondary_cpu = numachip_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, /* REMRD not supported */ diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index e4840aa7a255..74ffd3eb681e 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -196,9 +196,6 @@ static struct apic apic_bigsmp = { .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index cceb352c968c..64248c7149e3 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -116,9 +116,6 @@ static struct apic apic_default = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e66766bf1641..8b3200cc57e3 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -277,8 +277,6 @@ static struct apic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6d600ebf6c12..14c189151910 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -131,8 +131,6 @@ static struct apic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 293b41df54ef..58f098880058 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -394,8 +394,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_secondary_cpu = uv_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, -- cgit v1.2.2 From 300eddf967920d35affa75db77c50c0fa493446a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:30 -0700 Subject: x86, apic: Remove smp_callin_clear_local_apic callback Since commit b5660ba76b41 ("x86, platforms: Remove NUMAQ") removed NUMAQ, the smp_callin_clear_local_apic() apic callback has been obsolete. Remove it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302349040.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - arch/x86/kernel/smpboot.c | 4 ---- 10 files changed, 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2201c262b00c..b8c1380690e7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -336,7 +336,6 @@ struct apic { int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); bool wait_for_init_deassert; - void (*smp_callin_clear_local_apic)(void); void (*inquire_remote_apic)(int apicid); /* apic ops */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7d56b65f3ddf..a790497902e4 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -197,7 +197,6 @@ static struct apic apic_flat = { .send_IPI_self = apic_send_IPI_self, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, @@ -311,7 +310,6 @@ static struct apic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c638efa6ad67..f05b4f090214 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -169,7 +169,6 @@ struct apic apic_noop = { .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = noop_apic_read, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c32f8827e3a4..c0d9165be0d7 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -247,7 +247,6 @@ static const struct apic apic_numachip __refconst = { .wakeup_secondary_cpu = numachip_wakeup_secondary, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, /* REMRD not supported */ .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 74ffd3eb681e..eb159163064c 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -197,7 +197,6 @@ static struct apic apic_bigsmp = { .send_IPI_self = default_send_IPI_self, .wait_for_init_deassert = true, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 64248c7149e3..5b617a92af0e 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -117,7 +117,6 @@ static struct apic apic_default = { .send_IPI_self = default_send_IPI_self, .wait_for_init_deassert = true, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8b3200cc57e3..d96cab15f6f4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -278,7 +278,6 @@ static struct apic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 14c189151910..73d0fee6d0cd 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -132,7 +132,6 @@ static struct apic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 58f098880058..cc5d40e6c85d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -395,7 +395,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .wakeup_secondary_cpu = uv_wakeup_secondary, .wait_for_init_deassert = false, - .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5492798930ef..f0f3b194e9f9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -168,10 +168,6 @@ static void smp_callin(void) * CPU, first the APIC. (this is probably redundant on most * boards) */ - - pr_debug("CALLIN, before setup_local_APIC()\n"); - if (apic->smp_callin_clear_local_apic) - apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); -- cgit v1.2.2 From c460b5d34018d71fdeb8540620690883db3f959b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:34 -0700 Subject: x86, apic: Remove mps_oem_check callback Since commit b5660ba76b41 ("x86, platforms: Remove NUMAQ") removed NUMAQ, the mps_oem_check() apic callback has been obsolete. Remove it. This allows generic_mps_oem_check() to be removed as well. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302349390.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 7 ------- arch/x86/include/asm/mpspec.h | 2 -- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 2 -- arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/probe_32.c | 25 +------------------------ arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - arch/x86/kernel/mpparse.c | 3 --- 11 files changed, 1 insertion(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b8c1380690e7..86d155026c3e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -309,13 +309,6 @@ struct apic { void (*enable_apic_mode)(void); int (*phys_pkg_id)(int cpuid_apic, int index_msb); - /* - * When one of the next two hooks returns 1 the apic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); - unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 7bef40a01a1d..b07233b64578 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -150,6 +150,4 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; -extern int generic_mps_oem_check(struct mpc_table *, char *, char *); - #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index a790497902e4..86c4203af12e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -182,7 +182,6 @@ static struct apic apic_flat = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, @@ -295,7 +294,6 @@ static struct apic apic_physflat = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f05b4f090214..6b25735cc1cd 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -152,8 +152,6 @@ struct apic apic_noop = { .phys_pkg_id = noop_phys_pkg_id, - .mps_oem_check = NULL, - .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c0d9165be0d7..5f835003634a 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -231,7 +231,6 @@ static const struct apic apic_numachip __refconst = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = numachip_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index eb159163064c..90f6427687ea 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -182,7 +182,6 @@ static struct apic apic_bigsmp = { .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 5b617a92af0e..b4211f69e6a2 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -102,7 +102,6 @@ static struct apic apic_default = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = default_get_apic_id, .set_apic_id = NULL, @@ -210,29 +209,7 @@ void __init generic_apic_probe(void) printk(KERN_INFO "Using APIC driver %s\n", apic->name); } -/* These functions can switch the APIC even after the initial ->probe() */ - -int __init -generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!((*drv)->mps_oem_check)) - continue; - if (!(*drv)->mps_oem_check(mpc, oem, productid)) - continue; - - if (!cmdline_apic) { - apic = *drv; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; - } - return 0; -} - +/* This function can switch the APIC even after the initial ->probe() */ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { struct apic **drv; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index d96cab15f6f4..7e4ac5587af0 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -263,7 +263,6 @@ static struct apic apic_x2apic_cluster = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 73d0fee6d0cd..01136e64c380 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -117,7 +117,6 @@ static struct apic apic_x2apic_phys = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index cc5d40e6c85d..119c55377640 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -379,7 +379,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = uv_phys_pkg_id, - .mps_oem_check = NULL, .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fde86d2b79f8..2d2a237f2c73 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -216,9 +216,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (!smp_check_mpc(mpc, oem, str)) return 0; -#ifdef CONFIG_X86_32 - generic_mps_oem_check(mpc, oem, str); -#endif /* Initialize the lapic mapping */ if (!acpi_lapic) register_lapic_address(mpc->lapic); -- cgit v1.2.2 From 658ffd7e6f5ce62e15df99df5f9e181d76ffda8e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:37 -0700 Subject: x86, apic: Remove check_apicid_present callback The check_apicid_present() apic callback is never called, so remove it and functions that implement it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302350160.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 6 ------ arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 6 ------ arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 6 ------ arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - 9 files changed, 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 86d155026c3e..e1c0c2d4a961 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -292,7 +292,6 @@ struct apic { int dest_logical; unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); - unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, const struct cpumask *mask); @@ -608,11 +607,6 @@ static inline unsigned long default_check_apicid_used(physid_mask_t *map, int ap return physid_isset(apicid, *map); } -static inline unsigned long default_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { *retmap = *phys_map; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 86c4203af12e..a8ed9ac1259b 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -168,7 +168,6 @@ static struct apic apic_flat = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, @@ -279,7 +278,6 @@ static struct apic apic_physflat = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 6b25735cc1cd..8c165d9c204c 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -94,11 +94,6 @@ static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) return physid_isset(apicid, *map); } -static unsigned long noop_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, const struct cpumask *mask) { @@ -134,7 +129,6 @@ struct apic apic_noop = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = noop_check_apicid_used, - .check_apicid_present = noop_check_apicid_present, .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 5f835003634a..b49eb4047ef6 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -217,7 +217,6 @@ static const struct apic apic_numachip __refconst = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 90f6427687ea..163ad45331d8 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -31,11 +31,6 @@ static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) return 0; } -static unsigned long bigsmp_check_apicid_present(int bit) -{ - return 1; -} - static int bigsmp_early_logical_apicid(int cpu) { /* on bigsmp, logical apicid is the same as physical */ @@ -168,7 +163,6 @@ static struct apic apic_bigsmp = { .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, - .check_apicid_present = bigsmp_check_apicid_present, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index b4211f69e6a2..4d92ef883499 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -88,7 +88,6 @@ static struct apic apic_default = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .check_apicid_present = default_check_apicid_present, .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 7e4ac5587af0..d7bbe89b18da 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -249,7 +249,6 @@ static struct apic apic_x2apic_cluster = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 01136e64c380..e23bec9ecd71 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -103,7 +103,6 @@ static struct apic apic_x2apic_phys = { .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 119c55377640..633732f10d5a 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -365,7 +365,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .check_apicid_present = NULL, .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, -- cgit v1.2.2 From 4c59f3e63d5a0673af8abbce64467a6b497c94d2 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:40 -0700 Subject: x86, apic: Replace noop_check_apicid_used noop_check_apicid_used() has the same implementation as default_check_apicid_used() in the standard header file, so replace the former with the latter. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302350450.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic_noop.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 8c165d9c204c..c480dac40f90 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -89,11 +89,6 @@ static const struct cpumask *noop_target_cpus(void) return cpumask_of(0); } -static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, const struct cpumask *mask) { @@ -128,7 +123,7 @@ struct apic apic_noop = { .target_cpus = noop_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = noop_check_apicid_used, + .check_apicid_used = default_check_apicid_used, .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, -- cgit v1.2.2 From e76661ba09353efd04e3435ac15bb9444f5cf1fa Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:43 -0700 Subject: x86, apic: Remove multi_timer_check callback Since commit b5660ba76b41 ("x86, platforms: Remove NUMAQ") removed NUMAQ, the multi_timer_check() apic callback has been obsolete. Remove it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302351120.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/io_apic.c | 16 ---------------- arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - 10 files changed, 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e1c0c2d4a961..d8da5fc6a54d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -300,7 +300,6 @@ struct apic { void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); int (*cpu_present_to_apicid)(int mps_cpu); void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index a8ed9ac1259b..f23b5e8d1bbb 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -174,7 +174,6 @@ static struct apic apic_flat = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, @@ -285,7 +284,6 @@ static struct apic apic_physflat = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c480dac40f90..66a91e5fc23d 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -130,7 +130,6 @@ struct apic apic_noop = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index b49eb4047ef6..3d7f6b2e312d 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -223,7 +223,6 @@ static const struct apic apic_numachip __refconst = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 163ad45331d8..fa99280ee90e 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -169,7 +169,6 @@ static struct apic apic_bigsmp = { .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, - .multi_timer_check = NULL, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a44dce8cc559..29290f554e79 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3882,14 +3882,6 @@ int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); struct io_apic_irq_attr attr; - /* - * Skip the timer IRQ if there's a quirk handler installed and if it - * returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(ioapic, virq)) - return 0; - /* Get default attribute if not set by caller yet */ if (!info->set) { u32 gsi = mp_pin_to_gsi(ioapic, hwirq); @@ -3919,14 +3911,6 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) int ioapic = (int)(long)domain->host_data; int pin = (int)data->hwirq; - /* - * Skip the timer IRQ if there's a quirk handler installed and if it - * returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(ioapic, virq)) - return; - ioapic_mask_entry(ioapic, pin); __remove_pin_from_irq(cfg, ioapic, pin); WARN_ON(cfg->irq_2_pin != NULL); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 4d92ef883499..a445842f24fe 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -94,7 +94,6 @@ static struct apic apic_default = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_flat_routing, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index d7bbe89b18da..88eeaa90c91c 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -255,7 +255,6 @@ static struct apic apic_x2apic_cluster = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e23bec9ecd71..29614ae682e7 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -109,7 +109,6 @@ static struct apic apic_x2apic_phys = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 633732f10d5a..53760128fd2b 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -371,7 +371,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, - .multi_timer_check = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, -- cgit v1.2.2 From 11a8318ef5a69cdb9be61f726d6e078d70af6129 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:47 -0700 Subject: x86, apic: Remove setup_portio_remap callback Since commit b5660ba76b41 ("x86, platforms: Remove NUMAQ") removed NUMAQ, the setup_portio_remap() apic callback has been obsolete. Remove it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302351480.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - arch/x86/kernel/smpboot.c | 4 ---- 10 files changed, 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index d8da5fc6a54d..e776ed29b74c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -302,7 +302,6 @@ struct apic { void (*setup_apic_routing)(void); int (*cpu_present_to_apicid)(int mps_cpu); void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); int (*phys_pkg_id)(int cpuid_apic, int index_msb); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f23b5e8d1bbb..67d7a9da342c 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -176,7 +176,6 @@ static struct apic apic_flat = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, @@ -286,7 +285,6 @@ static struct apic apic_physflat = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 66a91e5fc23d..fe5d5e6b479e 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -134,7 +134,6 @@ struct apic apic_noop = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 3d7f6b2e312d..6b55be3596b5 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -225,7 +225,6 @@ static const struct apic apic_numachip __refconst = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = numachip_phys_pkg_id, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index fa99280ee90e..349465aa2454 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -171,7 +171,6 @@ static struct apic apic_bigsmp = { .setup_apic_routing = bigsmp_setup_apic_routing, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index a445842f24fe..45ad6d894bb3 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -96,7 +96,6 @@ static struct apic apic_default = { .setup_apic_routing = setup_apic_flat_routing, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 88eeaa90c91c..ac70128aa9e9 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -257,7 +257,6 @@ static struct apic apic_x2apic_cluster = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 29614ae682e7..90eb77f81147 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -111,7 +111,6 @@ static struct apic apic_x2apic_phys = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 53760128fd2b..480c05d8df22 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -373,7 +373,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = uv_phys_pkg_id, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f0f3b194e9f9..2d872e08fab9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1139,10 +1139,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) enable_IO_APIC(); bsp_end_local_APIC_setup(); - - if (apic->setup_portio_remap) - apic->setup_portio_remap(); - smpboot_setup_io_apic(); /* * Set up local APIC timer on boot CPU. -- cgit v1.2.2 From 2f078b9cb8798cdabb7c2ff24b0b683eea546f96 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 30 Jul 2014 23:53:51 -0700 Subject: x86, apic: Remove enable_apic_mode callback The enable_apic_mode() apic callback is never called, so remove it. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1407302352320.17503@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic.c | 2 -- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 1 - 10 files changed, 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e776ed29b74c..c31fded99cf4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -303,7 +303,6 @@ struct apic { int (*cpu_present_to_apicid)(int mps_cpu); void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); int (*check_phys_apicid_present)(int phys_apicid); - void (*enable_apic_mode)(void); int (*phys_pkg_id)(int cpuid_apic, int index_msb); unsigned int (*get_apic_id)(unsigned long x); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d705bb7ed91b..67760275544b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2042,8 +2042,6 @@ void __init connect_bsp_APIC(void) imcr_pic_to_apic(); } #endif - if (apic->enable_apic_mode) - apic->enable_apic_mode(); } /** diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 67d7a9da342c..de918c410eae 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -177,7 +177,6 @@ static struct apic apic_flat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, .get_apic_id = flat_get_apic_id, @@ -286,7 +285,6 @@ static struct apic apic_physflat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = flat_phys_pkg_id, .get_apic_id = flat_get_apic_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index fe5d5e6b479e..b205cdbdbe6a 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -135,7 +135,6 @@ struct apic apic_noop = { .apicid_to_cpu_present = physid_set_mask_of_physid, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = noop_phys_pkg_id, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6b55be3596b5..ae915391ebec 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -226,7 +226,6 @@ static const struct apic apic_numachip __refconst = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = numachip_phys_pkg_id, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 349465aa2454..c4a8d63f8220 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -172,7 +172,6 @@ static struct apic apic_bigsmp = { .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, .get_apic_id = bigsmp_get_apic_id, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 45ad6d894bb3..bda488680dbc 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -97,7 +97,6 @@ static struct apic apic_default = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, .get_apic_id = default_get_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ac70128aa9e9..6ce600f9bc78 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -258,7 +258,6 @@ static struct apic apic_x2apic_cluster = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, .get_apic_id = x2apic_get_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 90eb77f81147..6fae733e9194 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -112,7 +112,6 @@ static struct apic apic_x2apic_phys = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = x2apic_phys_pkg_id, .get_apic_id = x2apic_get_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 480c05d8df22..004f017aa7b9 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -374,7 +374,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, .phys_pkg_id = uv_phys_pkg_id, .get_apic_id = x2apic_get_apic_id, -- cgit v1.2.2 From 3fbdf631b765571b18ee51e073d8f46958e98ceb Mon Sep 17 00:00:00 2001 From: Matt Rushton Date: Sat, 19 Jul 2014 17:01:34 -0700 Subject: xen/setup: Remove Identity Map Debug Message Removing a debug message for setting the identity map since it becomes rather noisy after rework of the identity map code. Signed-off-by: Matthew Rushton Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9bb3d82ffec8..3172692381ae 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -841,10 +841,9 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, pfn = ALIGN(pfn, P2M_PER_PAGE); } - if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), + WARN((pfn - pfn_s) != (pfn_e - pfn_s), "Identity mapping failed. We are %ld short of 1-1 mappings!\n", - (pfn_e - pfn_s) - (pfn - pfn_s))) - printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); + (pfn_e - pfn_s) - (pfn - pfn_s)); return pfn - pfn_s; } -- cgit v1.2.2 From 5e3bf215f4f2efc0af89e6dbc5da789744aeb5d7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 1 Aug 2014 14:47:56 -0700 Subject: x86/apic/vsmp: Make is_vsmp_box() static Since checkin 411cf9ee2946 x86, vsmp: Remove is_vsmp_box() from apic_is_clustered_box() ... is_vsmp_box() is only used in vsmp_64.c and does not have any header file declaring it, so make it explicitly static. Reported-by: kbuild test robot Cc: Shai Fultheim Cc: Oren Twaig Link: http://lkml.kernel.org/r/1404036068-11674-1-git-send-email-oren@scalemp.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vsmp_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index b99b9ad8540c..ee22c1d93ae5 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -152,7 +152,7 @@ static void __init detect_vsmp_box(void) is_vsmp = 1; } -int is_vsmp_box(void) +static int is_vsmp_box(void) { if (is_vsmp != -1) return is_vsmp; @@ -166,7 +166,7 @@ int is_vsmp_box(void) static void __init detect_vsmp_box(void) { } -int is_vsmp_box(void) +static int is_vsmp_box(void) { return 0; } -- cgit v1.2.2 From 8fb575ca396bc31d9fa99c26336e2432b41d1bfc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jul 2014 20:34:15 -0700 Subject: net: filter: rename sk_convert_filter() -> bpf_convert_filter() to indicate that this function is converting classic BPF into eBPF and not related to sockets Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 71737a83f022..e2ecc1380b3d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -235,7 +235,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, /* mov qword ptr [rbp-X],rbx */ EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); - /* sk_convert_filter() maps classic BPF register X to R7 and uses R8 + /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and * R8(r14). R9(r15) spill could be made conditional, but there is only * one 'bpf_error' return path out of helper functions inside bpf_jit.S -- cgit v1.2.2 From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jul 2014 20:34:16 -0700 Subject: net: filter: split 'struct sk_filter' into socket and bpf parts clean up names related to socket filtering and bpf in the following way: - everything that deals with sockets keeps 'sk_*' prefix - everything that is pure BPF is changed to 'bpf_*' prefix split 'struct sk_filter' into struct sk_filter { atomic_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; and struct bpf_prog { u32 jited:1, len:31; struct sock_fprog_kern *orig_prog; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; struct work_struct work; }; }; so that 'struct bpf_prog' can be used independent of sockets and cleans up 'unattached' bpf use cases split SK_RUN_FILTER macro into: SK_RUN_FILTER to be used with 'struct sk_filter *' and BPF_PROG_RUN to be used with 'struct bpf_prog *' __sk_filter_release(struct sk_filter *) gains __bpf_prog_release(struct bpf_prog *) helper function also perform related renames for the functions that work with 'struct bpf_prog *', since they're on the same lines: sk_filter_size -> bpf_prog_size sk_filter_select_runtime -> bpf_prog_select_runtime sk_filter_free -> bpf_prog_free sk_unattached_filter_create -> bpf_prog_create sk_unattached_filter_destroy -> bpf_prog_destroy sk_store_orig_filter -> bpf_prog_store_orig_filter sk_release_orig_filter -> bpf_release_orig_filter __sk_migrate_filter -> bpf_migrate_filter __sk_prepare_filter -> bpf_prepare_filter API for attaching classic BPF to a socket stays the same: sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *) and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program which is used by sockets, tun, af_packet API for 'unattached' BPF programs becomes: bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *) and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e2ecc1380b3d..5c8cb8043c5a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -211,7 +211,7 @@ struct jit_context { bool seen_ld_abs; }; -static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, +static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { struct bpf_insn *insn = bpf_prog->insnsi; @@ -841,7 +841,7 @@ common_load: ctx->seen_ld_abs = true; /* By design x64 JIT should support all BPF instructions * This error will be seen if new instruction was added * to interpreter, but not to JIT - * or if there is junk in sk_filter + * or if there is junk in bpf_prog */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; @@ -862,11 +862,11 @@ common_load: ctx->seen_ld_abs = true; return proglen; } -void bpf_jit_compile(struct sk_filter *prog) +void bpf_jit_compile(struct bpf_prog *prog) { } -void bpf_int_jit_compile(struct sk_filter *prog) +void bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; int proglen, oldproglen = 0; @@ -932,7 +932,7 @@ out: static void bpf_jit_free_deferred(struct work_struct *work) { - struct sk_filter *fp = container_of(work, struct sk_filter, work); + struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; @@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work) kfree(fp); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) { INIT_WORK(&fp->work, bpf_jit_free_deferred); -- cgit v1.2.2 From b16d8c231e4d03fefc7de1b8b62bad5659ee8070 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 5 Aug 2014 00:12:19 +0100 Subject: x86/efi: Fix 3DNow optimization build failure in EFI stub Building a 32-bit kernel with CONFIG_X86_USE_3DNOW and CONFIG_EFI_STUB leads to the following build error, drivers/firmware/efi/libstub/lib.a(efi-stub-helper.o): In function `efi_relocate_kernel': efi-stub-helper.c:(.text+0xda5): undefined reference to `_mmx_memcpy' This is due to the fact that the EFI boot stub pulls in the 3DNow optimized versions of the memcpy() prototype from arch/x86/include/asm/string_32.h, even though the _mmx_memcpy() implementation isn't available in the EFI stub. For now, predicate CONFIG_EFI_STUB on !CONFIG_X86_USE_3DNOW. This is most definitely a temporary fix. A complete solution will involve selectively including kernel headers/symbols into the early-boot execution environment of the EFI boot stub, i.e. something analogous to the way that the _SETUP symbol is used. Previous attempts have been made to fix this kind of problem, though none seem to have ever been merged, http://lkml.kernel.org/r/20120329104822.GA17233@x1.osrc.amd.com Clearly, this problem has been around for a long time. Reported-by: Ingo Molnar Cc: Borislav Petkov Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1407193939-27813-1-git-send-email-matt@console-pimps.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 801ed36c2e49..7ea8aaaab7fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1536,7 +1536,7 @@ config EFI config EFI_STUB bool "EFI stub support" - depends on EFI + depends on EFI && !X86_USE_3DNOW ---help--- This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. -- cgit v1.2.2 From 297e21053a52f060944e9f0de4c64fad9bcd72fc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:13 +1000 Subject: KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 287e4c85fff9..f9d16ff56c6b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE -- cgit v1.2.2 From f3380ca5d7edb5e31932998ab2e29dfdce39c5ed Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 5 Aug 2014 12:42:23 +0800 Subject: KVM: nVMX: Fix nested vmexit ack intr before load vmcs01 An external interrupt will cause a vmexit with reason "external interrupt" when L2 is running. L1 will pick up the interrupt through vmcs12 if L1 set the ack interrupt bit. Commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info if L1 asks us to) retrieves the interrupt that belongs to L1 before vmcs01 is loaded. This will lead to problems in the next patch, which would write to SVI of vmcs02 instead of vmcs01 (SVI of vmcs02 doesn't make sense because L2 runs without APICv). Reviewed-by: Paolo Bonzini Tested-by: Liu, RongrongX Tested-by: Felipe Reyes Fixes: 77b0f5d67ff2781f36831cba79674c3e97bd7acf Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li [Move tracepoint as well. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e618f34bde2d..bfe11cf124a1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8754,6 +8754,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); + vmx_load_vmcs01(vcpu); + if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) && nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); @@ -8769,8 +8771,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs12->vm_exit_intr_error_code, KVM_ISA_VMX); - vmx_load_vmcs01(vcpu); - vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); -- cgit v1.2.2 From 56cc2406d68c0f09505c389e276f27a99f495cbd Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 5 Aug 2014 12:42:24 +0800 Subject: KVM: nVMX: fix "acknowledge interrupt on exit" when APICv is in use After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info if L1 asks us to), "Acknowledge interrupt on exit" behavior can be emulated. To do so, KVM will ask the APIC for the interrupt vector if during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set. With APICv, kvm_get_apic_interrupt would return -1 and give the following WARNING: Call Trace: [] dump_stack+0x49/0x5e [] warn_slowpath_common+0x7c/0x96 [] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel] [] warn_slowpath_null+0x15/0x17 [] nested_vmx_vmexit+0xa4/0x233 [kvm_intel] [] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel] [] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm] [] vmx_check_nested_events+0xc3/0xd3 [kvm_intel] [] inject_pending_event+0xd0/0x16e [kvm] [] vcpu_enter_guest+0x319/0x704 [kvm] To fix this, we cannot rely on the processor's virtual interrupt delivery, because "acknowledge interrupt on exit" must only update the virtual ISR/PPR/IRR registers (and SVI, which is just a cache of the virtual ISR) but it should not deliver the interrupt through the IDT. Thus, KVM has to deliver the interrupt "by hand", similar to the treatment of EOI in commit fc57ac2c9ca8 (KVM: lapic: sync highest ISR to hardware apic on EOI, 2014-05-14). The patch modifies kvm_cpu_get_interrupt to always acknowledge an interrupt; there are only two callers, and the other is not affected because it is never reached with kvm_apic_vid_enabled() == true. Then it modifies apic_set_isr and apic_clear_irr to update SVI and RVI in addition to the registers. Suggested-by: Paolo Bonzini Suggested-by: "Zhang, Yang Z" Tested-by: Liu, RongrongX Tested-by: Felipe Reyes Fixes: 77b0f5d67ff2781f36831cba79674c3e97bd7acf Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq.c | 2 +- arch/x86/kvm/lapic.c | 52 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index bd0da433e6d7..a1ec6a50a05a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) vector = kvm_cpu_get_extint(v); - if (kvm_apic_vid_enabled(v->kvm) || vector != -1) + if (vector != -1) return vector; /* PIC */ return kvm_get_apic_interrupt(v); /* APIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3855103f71fd..08e8a899e005 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { - apic->irr_pending = false; + struct kvm_vcpu *vcpu; + + vcpu = apic->vcpu; + apic_clear_vector(vec, apic->regs + APIC_IRR); - if (apic_search_irr(apic) != -1) - apic->irr_pending = true; + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + /* try to update RVI */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + else { + vec = apic_search_irr(apic); + apic->irr_pending = (vec != -1); + } } static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { - /* Note that we never get here with APIC virtualization enabled. */ + struct kvm_vcpu *vcpu; + + if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) + return; + + vcpu = apic->vcpu; - if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) - ++apic->isr_count; - BUG_ON(apic->isr_count > MAX_APIC_VECTOR); /* - * ISR (in service register) bit is set when injecting an interrupt. - * The highest vector is injected. Thus the latest bit set matches - * the highest bit in ISR. + * With APIC virtualization enabled, all caching is disabled + * because the processor can modify ISR under the hood. Instead + * just set SVI. */ - apic->highest_isr_cache = vec; + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); + else { + ++apic->isr_count; + BUG_ON(apic->isr_count > MAX_APIC_VECTOR); + /* + * ISR (in service register) bit is set when injecting an interrupt. + * The highest vector is injected. Thus the latest bit set matches + * the highest bit in ISR. + */ + apic->highest_isr_cache = vec; + } } static inline int apic_find_highest_isr(struct kvm_lapic *apic) @@ -1627,11 +1648,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; - /* Note that we never get here with APIC virtualization enabled. */ - if (vector == -1) return -1; + /* + * We get here even with APIC virtualization enabled, if doing + * nested virtualization and L1 runs with the "acknowledge interrupt + * on exit" mode. Then we cannot inject the interrupt via RVI, + * because the process would deliver it through the IDT. + */ + apic_set_isr(vector, apic); apic_update_ppr(apic); apic_clear_irr(vector, apic); -- cgit v1.2.2 From c6e9d6f38894798696f23c8084ca7edbf16ee895 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Jul 2014 04:13:05 -0400 Subject: random: introduce getrandom(2) system call The getrandom(2) system call was requested by the LibreSSL Portable developers. It is analoguous to the getentropy(2) system call in OpenBSD. The rationale of this system call is to provide resiliance against file descriptor exhaustion attacks, where the attacker consumes all available file descriptors, forcing the use of the fallback code where /dev/[u]random is not available. Since the fallback code is often not well-tested, it is better to eliminate this potential failure mode entirely. The other feature provided by this new system call is the ability to request randomness from the /dev/urandom entropy pool, but to block until at least 128 bits of entropy has been accumulated in the /dev/urandom entropy pool. Historically, the emphasis in the /dev/urandom development has been to ensure that urandom pool is initialized as quickly as possible after system boot, and preferably before the init scripts start execution. This is because changing /dev/urandom reads to block represents an interface change that could potentially break userspace which is not acceptable. In practice, on most x86 desktop and server systems, in general the entropy pool can be initialized before it is needed (and in modern kernels, we will printk a warning message if not). However, on an embedded system, this may not be the case. And so with this new interface, we can provide the functionality of blocking until the urandom pool has been initialized. Any userspace program which uses this new functionality must take care to assure that if it is used during the boot process, that it will not cause the init scripts or other portions of the system startup to hang indefinitely. SYNOPSIS #include int getrandom(void *buf, size_t buflen, unsigned int flags); DESCRIPTION The system call getrandom() fills the buffer pointed to by buf with up to buflen random bytes which can be used to seed user space random number generators (i.e., DRBG's) or for other cryptographic uses. It should not be used for Monte Carlo simulations or other programs/algorithms which are doing probabilistic sampling. If the GRND_RANDOM flags bit is set, then draw from the /dev/random pool instead of the /dev/urandom pool. The /dev/random pool is limited based on the entropy that can be obtained from environmental noise, so if there is insufficient entropy, the requested number of bytes may not be returned. If there is no entropy available at all, getrandom(2) will either block, or return an error with errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags. If the GRND_RANDOM bit is not set, then the /dev/urandom pool will be used. Unlike using read(2) to fetch data from /dev/urandom, if the urandom pool has not been sufficiently initialized, getrandom(2) will block (or return -1 with the errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags). The getentropy(2) system call in OpenBSD can be emulated using the following function: int getentropy(void *buf, size_t buflen) { int ret; if (buflen > 256) goto failure; ret = getrandom(buf, buflen, 0); if (ret < 0) return ret; if (ret == buflen) return 0; failure: errno = EIO; return -1; } RETURN VALUE On success, the number of bytes that was filled in the buf is returned. This may not be all the bytes requested by the caller via buflen if insufficient entropy was present in the /dev/random pool, or if the system call was interrupted by a signal. On error, -1 is returned, and errno is set appropriately. ERRORS EINVAL An invalid flag was passed to getrandom(2) EFAULT buf is outside the accessible address space. EAGAIN The requested entropy was not available, and getentropy(2) would have blocked if the GRND_NONBLOCK flag was not set. EINTR While blocked waiting for entropy, the call was interrupted by a signal handler; see the description of how interrupted read(2) calls on "slow" devices are handled with and without the SA_RESTART flag in the signal(7) man page. NOTES For small requests (buflen <= 256) getrandom(2) will not return EINTR when reading from the urandom pool once the entropy pool has been initialized, and it will return all of the bytes that have been requested. This is the recommended way to use getrandom(2), and is designed for compatibility with OpenBSD's getentropy() system call. However, if you are using GRND_RANDOM, then getrandom(2) may block until the entropy accounting determines that sufficient environmental noise has been gathered such that getrandom(2) will be operating as a NRBG instead of a DRBG for those people who are working in the NIST SP 800-90 regime. Since it may block for a long time, these guarantees do *not* apply. The user may want to interrupt a hanging process using a signal, so blocking until all of the requested bytes are returned would be unfriendly. For this reason, the user of getrandom(2) MUST always check the return value, in case it returns some error, or if fewer bytes than requested was returned. In the case of !GRND_RANDOM and small request, the latter should never happen, but the careful userspace code (and all crypto code should be careful) should check for this anyway! Finally, unless you are doing long-term key generation (and perhaps not even then), you probably shouldn't be using GRND_RANDOM. The cryptographic algorithms used for /dev/urandom are quite conservative, and so should be sufficient for all purposes. The disadvantage of GRND_RANDOM is that it can block, and the increased complexity required to deal with partially fulfilled getrandom(2) requests. Signed-off-by: Theodore Ts'o Reviewed-by: Zach Brown --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..5b46a618aeb1 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr 353 i386 renameat2 sys_renameat2 +355 i386 getrandom sys_getrandom diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..0dc4bf891460 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr 316 common renameat2 sys_renameat2 +318 common getrandom sys_getrandom # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.2 From 7b2a583afb4ab894f78bc0f8bd136e96b6499a7e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 11 Jul 2014 08:45:25 +0100 Subject: x86/efi: Enforce CONFIG_RELOCATABLE for EFI boot stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without CONFIG_RELOCATABLE the early boot code will decompress the kernel to LOAD_PHYSICAL_ADDR. While this may have been fine in the BIOS days, that isn't going to fly with UEFI since parts of the firmware code/data may be located at LOAD_PHYSICAL_ADDR. Straying outside of the bounds of the regions we've explicitly requested from the firmware will cause all sorts of trouble. Bruno reports that his machine resets while trying to decompress the kernel image. We already go to great pains to ensure the kernel is loaded into a suitably aligned buffer, it's just that the address isn't necessarily LOAD_PHYSICAL_ADDR, because we can't guarantee that address isn't in-use by the firmware. Explicitly enforce CONFIG_RELOCATABLE for the EFI boot stub, so that we can load the kernel at any address with the correct alignment. Reported-by: Bruno Prémont Tested-by: Bruno Prémont Cc: H. Peter Anvin Cc: Signed-off-by: Matt Fleming --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fcefdda5136d..a24097768463 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1536,6 +1536,7 @@ config EFI config EFI_STUB bool "EFI stub support" depends on EFI + select RELOCATABLE ---help--- This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. -- cgit v1.2.2 From ed5c41d30ef2ce578fd6b6e2f7ec23f2a58b1eba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Aug 2014 22:57:19 +0200 Subject: x86: MCE: Add raw_lock conversion again Commit ea431643d6c3 ("x86/mce: Fix CMCI preemption bugs") breaks RT by the completely unrelated conversion of the cmci_discover_lock to a regular (non raw) spinlock. This lock was annotated in commit 59d958d2c7de ("locking, x86: mce: Annotate cmci_discover_lock as raw") with a proper explanation why. The argument for converting the lock back to a regular spinlock was: - it does percpu ops without disabling preemption. Preemption is not disabled due to the mistaken use of a raw spinlock. Which is complete nonsense. The raw_spinlock is disabling preemption in the same way as a regular spinlock. In mainline spinlock maps to raw_spinlock, in RT spinlock becomes a "sleeping" lock. raw_spinlock has on RT exactly the same semantics as in mainline. And because this lock is taken in non preemptible context it must be raw on RT. Undo the locking brainfart. Reported-by: Clark Williams Reported-by: Steven Rostedt Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 9a316b21df8b..3bdb95ae8c43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ -static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_RAW_SPINLOCK(cmci_discover_lock); #define CMCI_THRESHOLD 1 #define CMCI_POLL_INTERVAL (30 * HZ) @@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void) int bank; u64 val; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); owned = __get_cpu_var(mce_banks_owned); for_each_set_bit(bank, owned, MAX_NR_BANKS) { rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static bool cmci_storm_detect(void) @@ -211,7 +211,7 @@ static void cmci_discover(int banks) int i; int bios_wrong_thresh = 0; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; int bios_zero_thresh = 0; @@ -266,7 +266,7 @@ static void cmci_discover(int banks) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { pr_info_once( "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); @@ -316,10 +316,10 @@ void cmci_clear(void) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) __cmci_disable_bank(i); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void cmci_rediscover_work_func(void *arg) @@ -360,9 +360,9 @@ void cmci_disable_bank(int bank) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); __cmci_disable_bank(bank); - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } static void intel_init_cmci(void) -- cgit v1.2.2 From 307627eebbb0bc41b21e74d78b932362a6c1b38d Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 6 Oct 2013 21:57:10 +0200 Subject: um: Use get_signal() signal_setup_done() Use the more generic functions get_signal() signal_setup_done() for signal delivery. Signed-off-by: Richard Weinberger --- arch/x86/um/signal.c | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 5e04a1c899fa..79d824551c1a 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -370,13 +370,12 @@ struct rt_sigframe char retcode[8]; }; -int setup_signal_stack_sc(unsigned long stack_top, int sig, - struct k_sigaction *ka, struct pt_regs *regs, - sigset_t *mask) +int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *mask) { struct sigframe __user *frame; void __user *restorer; - int err = 0; + int err = 0, sig = ksig->sig; /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ stack_top = ((stack_top + 4) & -16UL) - 4; @@ -385,8 +384,8 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, return 1; restorer = frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); err |= __put_user(sig, &frame->sig); @@ -410,20 +409,19 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, return err; PT_REGS_SP(regs) = (unsigned long) frame; - PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler; PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; return 0; } -int setup_signal_stack_si(unsigned long stack_top, int sig, - struct k_sigaction *ka, struct pt_regs *regs, - siginfo_t *info, sigset_t *mask) +int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *mask) { struct rt_sigframe __user *frame; void __user *restorer; - int err = 0; + int err = 0, sig = ksig->sig; stack_top &= -8UL; frame = (struct rt_sigframe __user *) stack_top - 1; @@ -431,14 +429,14 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, return 1; restorer = frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_siginfo_to_user(&frame->info, &ksig->info); err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, PT_REGS_SP(regs)); @@ -457,7 +455,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, return err; PT_REGS_SP(regs) = (unsigned long) frame; - PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler; PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; @@ -502,12 +500,11 @@ struct rt_sigframe struct _fpstate fpstate; }; -int setup_signal_stack_si(unsigned long stack_top, int sig, - struct k_sigaction *ka, struct pt_regs * regs, - siginfo_t *info, sigset_t *set) +int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *set) { struct rt_sigframe __user *frame; - int err = 0; + int err = 0, sig = ksig->sig; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); @@ -517,8 +514,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto out; - if (ka->sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, info); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, &ksig->info); if (err) goto out; } @@ -543,8 +540,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, * already in userspace. */ /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + if (ksig->ka.sa.sa_flags & SA_RESTORER) + err |= __put_user(ksig->ka.sa.sa_restorer, &frame->pretcode); else /* could use a vstub here */ return err; @@ -570,7 +567,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, */ PT_REGS_SI(regs) = (unsigned long) &frame->info; PT_REGS_DX(regs) = (unsigned long) &frame->uc; - PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler; out: return err; } -- cgit v1.2.2 From 9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 Mon Sep 17 00:00:00 2001 From: Paul Cassella Date: Wed, 6 Aug 2014 16:07:24 -0700 Subject: mm: describe mmap_sem rules for __lock_page_or_retry() and callers Add a comment describing the circumstances in which __lock_page_or_retry() will or will not release the mmap_sem when returning 0. Add comments to lock_page_or_retry()'s callers (filemap_fault(), do_swap_page()) noting the impact on VM_FAULT_RETRY returns. Add comments on up the call tree, particularly replacing the false "We return with mmap_sem still held" comments. Signed-off-by: Paul Cassella Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbade870f90..a24194681513 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1218,7 +1218,8 @@ good_area: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo - * the fault: + * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if + * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. */ fault = handle_mm_fault(mm, vma, address, flags); -- cgit v1.2.2 From 9bfc41138525c51955cd35cbca56f8cd757a73f8 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Aug 2014 16:07:38 -0700 Subject: memory-hotplug: x86_64: suitable memory should go to ZONE_MOVABLE This patch introduces zone_for_memory() to arch_add_memory() on x86_64 to ensure new, higher memory added into ZONE_MOVABLE if movable zone has already setup. Signed-off-by: Wang Nan Cc: Zhang Yanfei Cc: Dave Hansen Cc: Ingo Molnar Cc: Yinghai Lu Cc: "Mel Gorman" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index df1a9927ad29..5621c47d7a1a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -691,7 +691,8 @@ static void update_end_of_memory_vars(u64 start, u64 size) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdat = NODE_DATA(nid); - struct zone *zone = pgdat->node_zones + ZONE_NORMAL; + struct zone *zone = pgdat->node_zones + + zone_for_memory(nid, start, size, ZONE_NORMAL); unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; -- cgit v1.2.2 From 03d4be64603e2dc1bfa624bc436869d73340723e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Aug 2014 16:07:40 -0700 Subject: memory-hotplug: x86_32: suitable memory should go to ZONE_MOVABLE This patch introduces zone_for_memory() to arch_add_memory() on x86_32 to ensure new, higher memory added into ZONE_MOVABLE if movable zone has already setup. Signed-off-by: Wang Nan Cc: Zhang Yanfei Cc: Dave Hansen Cc: Ingo Molnar Cc: Yinghai Lu Cc: "Mel Gorman" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index e39504878aec..7d05565ba781 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -825,7 +825,8 @@ void __init mem_init(void) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata = NODE_DATA(nid); - struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; + struct zone *zone = pgdata->node_zones + + zone_for_memory(nid, start, size, ZONE_HIGHMEM); unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; -- cgit v1.2.2 From 3eec595235c17a74094daa1e02d1b0af2e9a7125 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 8 Aug 2014 14:07:51 +0800 Subject: x86, irq, PCI: Keep IRQ assignment for PCI devices during suspend/hibernation Now IOAPIC driver dynamically allocates IRQ numbers for IOAPIC pins. We need to keep IRQ assignment for PCI devices during suspend/hibernation, otherwise it may cause failure of suspend/hibernation due to: 1) Device driver calls pci_enable_device() to allocate an IRQ number and register interrupt handler on the returned IRQ. 2) Device driver's suspend callback calls pci_disable_device() and release assigned IRQ in turn. 3) Device driver's resume callback calls pci_enable_device() to allocate IRQ number again. A different IRQ number may be assigned by IOAPIC driver this time. 4) Now the hardware delivers interrupt to the new IRQ but interrupt handler is still registered against the old IRQ, so it breaks suspend/hibernation. To fix this issue, we keep IRQ assignment during suspend/hibernation. Flag pci_dev.dev.power.is_prepared is used to detect that pci_disable_device() is called during suspend/hibernation. Reported-and-Tested-by: Borislav Petkov Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Grant Likely Cc: Len Brown Link: http://lkml.kernel.org/r/1407478071-29399-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/intel_mid_pci.c | 2 +- arch/x86/pci/irq.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 09fece368592..3865116c51fb 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -229,7 +229,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (dev->irq > 0) + if (!dev->dev.power.is_prepared && dev->irq > 0) mp_unmap_irq(dev->irq); } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 748cfe8ab322..bc1a2c341891 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1256,7 +1256,8 @@ static int pirq_enable_irq(struct pci_dev *dev) static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && dev->irq) { + if (io_apic_assign_pci_irqs && !dev->dev.power.is_prepared && + dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; } -- cgit v1.2.2 From 308c09f17da4adc53935115dbeb5bce4f067d8f9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 8 Aug 2014 14:23:25 -0700 Subject: lib/scatterlist: make ARCH_HAS_SG_CHAIN an actual Kconfig Rather than have architectures #define ARCH_HAS_SG_CHAIN in an architecture specific scatterlist.h, make it a proper Kconfig option and use that instead. At same time, remove the header files are are now mostly useless and just include asm-generic/scatterlist.h. [sfr@canb.auug.org.au: powerpc files now need asm/dma.h] Signed-off-by: Laura Abbott Acked-by: Thomas Gleixner [x86] Acked-by: Benjamin Herrenschmidt [powerpc] Acked-by: Heiko Carstens Cc: Russell King Cc: Tony Luck Cc: Fenghua Yu Cc: Paul Mackerras Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Martin Schwidefsky Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/Kbuild | 3 ++- arch/x86/include/asm/scatterlist.h | 8 -------- 3 files changed, 3 insertions(+), 9 deletions(-) delete mode 100644 arch/x86/include/asm/scatterlist.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bf2405053af5..c915cc6e40be 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -96,6 +96,7 @@ config X86 select IRQ_FORCED_THREADING select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_HAS_SG_CHAIN select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3ca9762e1649..3bf000fab0ae 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,6 +5,7 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += early_ioremap.h generic-y += cputime.h +generic-y += early_ioremap.h generic-y += mcs_spinlock.h +generic-y += scatterlist.h diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h deleted file mode 100644 index 4240878b9d76..000000000000 --- a/arch/x86/include/asm/scatterlist.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_SCATTERLIST_H -#define _ASM_X86_SCATTERLIST_H - -#include - -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_X86_SCATTERLIST_H */ -- cgit v1.2.2 From a6c19dfe39941a5d3f4d072121c0a4841e7e26fd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 8 Aug 2014 14:23:40 -0700 Subject: arm64,ia64,ppc,s390,sh,tile,um,x86,mm: remove default gate area The core mm code will provide a default gate area based on FIXADDR_USER_START and FIXADDR_USER_END if !defined(__HAVE_ARCH_GATE_AREA) && defined(AT_SYSINFO_EHDR). This default is only useful for ia64. arm64, ppc, s390, sh, tile, 64-bit UML, and x86_32 have their own code just to disable it. arm, 32-bit UML, and x86_64 have gate areas, but they have their own implementations. This gets rid of the default and moves the code into ia64. This should save some code on architectures without a gate area: it's now possible to inline the gate_area functions in the default case. Signed-off-by: Andy Lutomirski Acked-by: Nathan Lynch Acked-by: H. Peter Anvin Acked-by: Benjamin Herrenschmidt [in principle] Acked-by: Richard Weinberger [for um] Acked-by: Will Deacon [for arm64] Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Chris Metcalf Cc: Jeff Dike Cc: Richard Weinberger Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Nathan Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page.h | 1 - arch/x86/include/asm/page_64.h | 2 ++ arch/x86/um/asm/elf.h | 1 - arch/x86/um/mem_64.c | 15 --------------- arch/x86/vdso/vdso32-setup.c | 19 +------------------ 5 files changed, 3 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 775873d3be55..802dde30c928 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr); #include #include -#define __HAVE_ARCH_GATE_AREA 1 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 0f1ddee6a0ce..f408caf73430 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -39,4 +39,6 @@ void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 0feee2fd5077..25a1022dd793 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -216,6 +216,5 @@ extern long elf_aux_hwcap; #define ELF_HWCAP (elf_aux_hwcap) #define SET_PERSONALITY(ex) do ; while(0) -#define __HAVE_ARCH_GATE_AREA 1 #endif diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c index c6492e75797b..f8fecaddcc0d 100644 --- a/arch/x86/um/mem_64.c +++ b/arch/x86/um/mem_64.c @@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index e4f7781ee162..e904c270573b 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void) return 0; } __initcall(ia32_binfmt_init); -#endif - -#else /* CONFIG_X86_32 */ - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} +#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_X86_64 */ -- cgit v1.2.2 From 164109e3cdba52b9f2ece063bc3aa2a90f77c273 Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Fri, 8 Aug 2014 14:24:03 -0700 Subject: arch/x86: replace strict_strto calls Replace obsolete strict_strto calls with appropriate kstrto calls Signed-off-by: Daniel Walter Acked-by: Borislav Petkov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/intel_cacheinfo.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce.c | 6 +++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- arch/x86/kvm/mmu_audit.c | 2 +- arch/x86/platform/uv/tlb_uv.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9c8f7394c612..c7035073dfc1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); - if (strict_strtoul(buf, 10, &val) < 0) + if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); @@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return -EINVAL; - if (strict_strtoul(buf, 16, &val) < 0) + if (kstrtoul(buf, 16, &val) < 0) return -EINVAL; if (amd_set_subcaches(cpu, val)) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4fc57975acc1..bd9ccda8087f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; attr_to_bank(attr)->ctl = new; @@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; if (mca_cfg.ignore_ce ^ !!new) { @@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s, { u64 new; - if (strict_strtoull(buf, 0, &new) < 0) + if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; if (mca_cfg.cmci_disabled ^ !!new) { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 603df4f74640..1e49f8f41276 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) if (!b->interrupt_capable) return -EINVAL; - if (strict_strtoul(buf, 0, &new) < 0) + if (kstrtoul(buf, 0, &new) < 0) return -EINVAL; b->interrupt_enable = !!new; @@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) struct thresh_restart tr; unsigned long new; - if (strict_strtoul(buf, 0, &new) < 0) + if (kstrtoul(buf, 0, &new) < 0) return -EINVAL; if (new > THRESHOLD_MAX) diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 1185fe7a7f47..9ade5cfb5a4c 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -273,7 +273,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp) int ret; unsigned long enable; - ret = strict_strtoul(val, 10, &enable); + ret = kstrtoul(val, 10, &enable); if (ret < 0) return -EINVAL; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index ed161c6e278b..3968d67d366b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1479,7 +1479,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, return count; } - if (strict_strtol(optstr, 10, &input_arg) < 0) { + if (kstrtol(optstr, 10, &input_arg) < 0) { printk(KERN_DEBUG "%s is invalid\n", optstr); return -EINVAL; } -- cgit v1.2.2 From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:29 -0700 Subject: shm: add memfd_create() syscall memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor that you can pass to mmap(). It can support sealing and avoids any connection to user-visible mount-points. Thus, it's not subject to quotas on mounted file-systems, but can be used like malloc()'ed memory, but with a file-descriptor to it. memfd_create() returns the raw shmem file, so calls like ftruncate() can be used to modify the underlying inode. Also calls like fstat() will return proper information and mark the file as regular file. If you want sealing, you can specify MFD_ALLOW_SEALING. Otherwise, sealing is not supported (like on all other regular files). Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not subject to a filesystem size limit. It is still properly accounted to memcg limits, though, and to the same overcommit or no-overcommit accounting as all user memory. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d1b4a119d4a5..028b78168d85 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -362,3 +362,4 @@ 353 i386 renameat2 sys_renameat2 354 i386 seccomp sys_seccomp 355 i386 getrandom sys_getrandom +356 i386 memfd_create sys_memfd_create diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 252c804bb1aa..ca2b9aa78c81 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -325,6 +325,7 @@ 316 common renameat2 sys_renameat2 317 common seccomp sys_seccomp 318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.2 From de5b56ba51f63973ceb5c184ee0855f0c8a13fc9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:41 -0700 Subject: kernel: build bin2c based on config option CONFIG_BUILD_BIN2C currently bin2c builds only if CONFIG_IKCONFIG=y. But bin2c will now be used by kexec too. So make it compilation dependent on CONFIG_BUILD_BIN2C and this config option can be selected by CONFIG_KEXEC and CONFIG_IKCONFIG. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c915cc6e40be..98fe3df6df82 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1582,6 +1582,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" + select BUILD_BIN2C ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot -- cgit v1.2.2 From f0895685c7fd8c938c91a9d8a6f7c11f22df58d2 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:55 -0700 Subject: kexec: new syscall kexec_file_load() declaration This is the new syscall kexec_file_load() declaration/interface. I have reserved the syscall number only for x86_64 so far. Other architectures (including i386) can reserve syscall number when they enable the support for this new syscall. Signed-off-by: Vivek Goyal Cc: Michael Kerrisk Cc: Borislav Petkov Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/syscalls/syscall_64.tbl | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ca2b9aa78c81..35dd922727b9 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -326,6 +326,7 @@ 317 common seccomp sys_seccomp 318 common getrandom sys_getrandom 319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.2 From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:57 -0700 Subject: kexec: implementation of new syscall kexec_file_load Previous patch provided the interface definition and this patch prvides implementation of new syscall. Previously segment list was prepared in user space. Now user space just passes kernel fd, initrd fd and command line and kernel will create a segment list internally. This patch contains generic part of the code. Actual segment preparation and loading is done by arch and image specific loader. Which comes in next patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_64.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 679cef0791cd..c8875b5545e1 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -22,6 +22,10 @@ #include #include +static struct kexec_file_ops *kexec_file_loaders[] = { + NULL, +}; + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.pud); @@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void) (unsigned long)&_text - __START_KERNEL); } +/* arch-dependent functionality related to kexec file-based syscall */ + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image); +} -- cgit v1.2.2 From daeba0641a707626f3674db71016f333edf64395 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:59 -0700 Subject: purgatory/sha256: provide implementation of sha256 in purgaotory context Next two patches provide code for purgatory. This is a code which does not link against the kernel and runs stand alone. This code runs between two kernels. One of the primary purpose of this code is to verify the digest of newly loaded kernel and making sure it matches the digest computed at kernel load time. We use sha256 for calculating digest of kexec segmetns. Purgatory can't use stanard crypto API as that API is not available in purgatory context. Hence, I have copied code from crypto/sha256_generic.c and compiled it with purgaotry code so that it could be used. I could not #include sha256_generic.c file here as some of the function signature requiered little tweaking. Original functions work with crypto API but these ones don't So instead of doing #include on sha256_generic.c I just copied relevant portions of code into arch/x86/purgatory/sha256.c. Now we shouldn't have to touch this code at all. Do let me know if there are better ways to handle it. This patch does not enable compiling of this code. That happens in next patch. I wanted to highlight this change in a separate patch for easy review. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/sha256.c | 283 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/purgatory/sha256.h | 22 ++++ 2 files changed, 305 insertions(+) create mode 100644 arch/x86/purgatory/sha256.c create mode 100644 arch/x86/purgatory/sha256.h (limited to 'arch/x86') diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c new file mode 100644 index 000000000000..548ca675a14a --- /dev/null +++ b/arch/x86/purgatory/sha256.c @@ -0,0 +1,283 @@ +/* + * SHA-256, as specified in + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + * + * SHA-256 code by Jean-Luc Cooke . + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * Copyright (c) 2014 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include "sha256.h" +#include "../boot/string.h" + +static inline u32 Ch(u32 x, u32 y, u32 z) +{ + return z ^ (x & (y ^ z)); +} + +static inline u32 Maj(u32 x, u32 y, u32 z) +{ + return (x & y) | (z & (x | y)); +} + +#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) +#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) +#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) +#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) + +static inline void LOAD_OP(int I, u32 *W, const u8 *input) +{ + W[I] = __be32_to_cpu(((__be32 *)(input))[I]); +} + +static inline void BLEND_OP(int I, u32 *W) +{ + W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; +} + +static void sha256_transform(u32 *state, const u8 *input) +{ + u32 a, b, c, d, e, f, g, h, t1, t2; + u32 W[64]; + int i; + + /* load the input */ + for (i = 0; i < 16; i++) + LOAD_OP(i, W, input); + + /* now blend */ + for (i = 16; i < 64; i++) + BLEND_OP(i, W); + + /* load the state into our registers */ + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + /* now iterate */ + t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; + + /* clear any sensitive info... */ + a = b = c = d = e = f = g = h = t1 = t2 = 0; + memset(W, 0, 64 * sizeof(u32)); +} + +int sha256_init(struct sha256_state *sctx) +{ + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) +{ + unsigned int partial, done; + const u8 *src; + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + + if ((partial + len) > 63) { + if (partial) { + done = -partial; + memcpy(sctx->buf + partial, data, done + 64); + src = sctx->buf; + } + + do { + sha256_transform(sctx->state, src); + done += 64; + src = data + done; + } while (done + 63 < len); + + partial = 0; + } + memcpy(sctx->buf + partial, src, len - done); + + return 0; +} + +int sha256_final(struct sha256_state *sctx, u8 *out) +{ + __be32 *dst = (__be32 *)out; + __be64 bits; + unsigned int index, pad_len; + int i; + static const u8 padding[64] = { 0x80, }; + + /* Save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64. */ + index = sctx->count & 0x3f; + pad_len = (index < 56) ? (56 - index) : ((64+56) - index); + sha256_update(sctx, padding, pad_len); + + /* Append length (before padding) */ + sha256_update(sctx, (const u8 *)&bits, sizeof(bits)); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Zeroize sensitive information. */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h new file mode 100644 index 000000000000..bd15a4127735 --- /dev/null +++ b/arch/x86/purgatory/sha256.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 Red Hat Inc. + * + * Author: Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef SHA256_H +#define SHA256_H + + +#include +#include + +extern int sha256_init(struct sha256_state *sctx); +extern int sha256_update(struct sha256_state *sctx, const u8 *input, + unsigned int length); +extern int sha256_final(struct sha256_state *sctx, u8 *hash); + +#endif /* SHA256_H */ -- cgit v1.2.2 From 8fc5b4d4121c95482b2583a07863c6b0aba2d9e1 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:02 -0700 Subject: purgatory: core purgatory functionality Create a stand alone relocatable object purgatory which runs between two kernels. This name, concept and some code has been taken from kexec-tools. Idea is that this code runs after a crash and it runs in minimal environment. So keep it separate from rest of the kernel and in long term we will have to practically do no maintenance of this code. This code also has the logic to do verify sha256 hashes of various segments which have been loaded into memory. So first we verify that the kernel we are jumping to is fine and has not been corrupted and make progress only if checsums are verified. This code also takes care of copying some memory contents to backup region. [sfr@canb.auug.org.au: run host built programs from objtree] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kbuild | 4 ++ arch/x86/Makefile | 8 +++ arch/x86/purgatory/Makefile | 30 +++++++++++ arch/x86/purgatory/entry64.S | 101 ++++++++++++++++++++++++++++++++++++++ arch/x86/purgatory/purgatory.c | 72 +++++++++++++++++++++++++++ arch/x86/purgatory/setup-x86_64.S | 58 ++++++++++++++++++++++ arch/x86/purgatory/stack.S | 19 +++++++ arch/x86/purgatory/string.c | 13 +++++ 8 files changed, 305 insertions(+) create mode 100644 arch/x86/purgatory/Makefile create mode 100644 arch/x86/purgatory/entry64.S create mode 100644 arch/x86/purgatory/purgatory.c create mode 100644 arch/x86/purgatory/setup-x86_64.S create mode 100644 arch/x86/purgatory/stack.S create mode 100644 arch/x86/purgatory/string.c (limited to 'arch/x86') diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index e5287d8517aa..61b6d51866f8 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/ obj-y += platform/ obj-y += net/ + +ifeq ($(CONFIG_X86_64),y) +obj-$(CONFIG_KEXEC) += purgatory/ +endif diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c65fd9650467..c1aa36887843 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -183,6 +183,14 @@ archscripts: scripts_basic archheaders: $(Q)$(MAKE) $(build)=arch/x86/syscalls all +archprepare: +ifeq ($(CONFIG_KEXEC),y) +# Build only for 64bit. No loaders for 32bit yet. + ifeq ($(CONFIG_X86_64),y) + $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c + endif +endif + ### # Kernel objects diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile new file mode 100644 index 000000000000..7fde9ee438a4 --- /dev/null +++ b/arch/x86/purgatory/Makefile @@ -0,0 +1,30 @@ +purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o + +targets += $(purgatory-y) +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib +targets += purgatory.ro + +# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That +# in turn leaves some undefined symbols like __fentry__ in purgatory and not +# sure how to relocate those. Like kexec-tools, use custom flags. + +KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +targets += kexec-purgatory.c + +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + + +# No loaders for 32bits yet. +ifeq ($(CONFIG_X86_64),y) + obj-$(CONFIG_KEXEC) += kexec-purgatory.o +endif diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S new file mode 100644 index 000000000000..be3249d7ed2d --- /dev/null +++ b/arch/x86/purgatory/entry64.S @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2014 Red Hat Inc. + + * Author(s): Vivek Goyal + * + * This code has been taken from kexec-tools. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + .text + .balign 16 + .code64 + .globl entry64, entry64_regs + + +entry64: + /* Setup a gdt that should be preserved */ + lgdt gdt(%rip) + + /* load the data segments */ + movl $0x18, %eax /* data segment */ + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + /* Setup new stack */ + leaq stack_init(%rip), %rsp + pushq $0x10 /* CS */ + leaq new_cs_exit(%rip), %rax + pushq %rax + lretq +new_cs_exit: + + /* Load the registers */ + movq rax(%rip), %rax + movq rbx(%rip), %rbx + movq rcx(%rip), %rcx + movq rdx(%rip), %rdx + movq rsi(%rip), %rsi + movq rdi(%rip), %rdi + movq rsp(%rip), %rsp + movq rbp(%rip), %rbp + movq r8(%rip), %r8 + movq r9(%rip), %r9 + movq r10(%rip), %r10 + movq r11(%rip), %r11 + movq r12(%rip), %r12 + movq r13(%rip), %r13 + movq r14(%rip), %r14 + movq r15(%rip), %r15 + + /* Jump to the new code... */ + jmpq *rip(%rip) + + .section ".rodata" + .balign 4 +entry64_regs: +rax: .quad 0x0 +rbx: .quad 0x0 +rcx: .quad 0x0 +rdx: .quad 0x0 +rsi: .quad 0x0 +rdi: .quad 0x0 +rsp: .quad 0x0 +rbp: .quad 0x0 +r8: .quad 0x0 +r9: .quad 0x0 +r10: .quad 0x0 +r11: .quad 0x0 +r12: .quad 0x0 +r13: .quad 0x0 +r14: .quad 0x0 +r15: .quad 0x0 +rip: .quad 0x0 + .size entry64_regs, . - entry64_regs + + /* GDT */ + .section ".rodata" + .balign 16 +gdt: + /* 0x00 unusable segment + * 0x08 unused + * so use them as gdt ptr + */ + .word gdt_end - gdt - 1 + .quad gdt + .word 0, 0, 0 + + /* 0x10 4GB flat code segment */ + .word 0xFFFF, 0x0000, 0x9A00, 0x00AF + + /* 0x18 4GB flat data segment */ + .word 0xFFFF, 0x0000, 0x9200, 0x00CF +gdt_end: +stack: .quad 0, 0 +stack_init: diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c new file mode 100644 index 000000000000..25e068ba3382 --- /dev/null +++ b/arch/x86/purgatory/purgatory.c @@ -0,0 +1,72 @@ +/* + * purgatory: Runs between two kernels + * + * Copyright (C) 2014 Red Hat Inc. + * + * Author: + * Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include "sha256.h" +#include "../boot/string.h" + +struct sha_region { + unsigned long start; + unsigned long len; +}; + +unsigned long backup_dest = 0; +unsigned long backup_src = 0; +unsigned long backup_sz = 0; + +u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; + +struct sha_region sha_regions[16] = {}; + +/* + * On x86, second kernel requries first 640K of memory to boot. Copy + * first 640K to a backup region in reserved memory range so that second + * kernel can use first 640K. + */ +static int copy_backup_region(void) +{ + if (backup_dest) + memcpy((void *)backup_dest, (void *)backup_src, backup_sz); + + return 0; +} + +int verify_sha256_digest(void) +{ + struct sha_region *ptr, *end; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state sctx; + + sha256_init(&sctx); + end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])]; + for (ptr = sha_regions; ptr < end; ptr++) + sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); + + sha256_final(&sctx, digest); + + if (memcmp(digest, sha256_digest, sizeof(digest))) + return 1; + + return 0; +} + +void purgatory(void) +{ + int ret; + + ret = verify_sha256_digest(); + if (ret) { + /* loop forever */ + for (;;) + ; + } + copy_backup_region(); +} diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S new file mode 100644 index 000000000000..fe3c91ba1bd0 --- /dev/null +++ b/arch/x86/purgatory/setup-x86_64.S @@ -0,0 +1,58 @@ +/* + * purgatory: setup code + * + * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) + * Copyright (C) 2014 Red Hat Inc. + * + * This code has been taken from kexec-tools. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + .text + .globl purgatory_start + .balign 16 +purgatory_start: + .code64 + + /* Load a gdt so I know what the segment registers are */ + lgdt gdt(%rip) + + /* load the data segments */ + movl $0x18, %eax /* data segment */ + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + /* Setup a stack */ + leaq lstack_end(%rip), %rsp + + /* Call the C code */ + call purgatory + jmp entry64 + + .section ".rodata" + .balign 16 +gdt: /* 0x00 unusable segment + * 0x08 unused + * so use them as the gdt ptr + */ + .word gdt_end - gdt - 1 + .quad gdt + .word 0, 0, 0 + + /* 0x10 4GB flat code segment */ + .word 0xFFFF, 0x0000, 0x9A00, 0x00AF + + /* 0x18 4GB flat data segment */ + .word 0xFFFF, 0x0000, 0x9200, 0x00CF +gdt_end: + + .bss + .balign 4096 +lstack: + .skip 4096 +lstack_end: diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S new file mode 100644 index 000000000000..3cefba1fefc8 --- /dev/null +++ b/arch/x86/purgatory/stack.S @@ -0,0 +1,19 @@ +/* + * purgatory: stack + * + * Copyright (C) 2014 Red Hat Inc. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + /* A stack for the loaded kernel. + * Seperate and in the data section so it can be prepopulated. + */ + .data + .balign 4096 + .globl stack, stack_end + +stack: + .skip 4096 +stack_end: diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c new file mode 100644 index 000000000000..d886b1fa36f0 --- /dev/null +++ b/arch/x86/purgatory/string.c @@ -0,0 +1,13 @@ +/* + * Simple string functions. + * + * Copyright (C) 2014 Red Hat Inc. + * + * Author: + * Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include "../boot/string.c" -- cgit v1.2.2 From 12db5562e0352986a265841638482b84f3a6899b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:04 -0700 Subject: kexec: load and relocate purgatory at kernel load time Load purgatory code in RAM and relocate it based on the location. Relocation code has been inspired by module relocation code and purgatory relocation code in kexec-tools. Also compute the checksums of loaded kexec segments and store them in purgatory. Arch independent code provides this functionality so that arch dependent bootloaders can make use of it. Helper functions are provided to get/set symbol values in purgatory which are used by bootloaders later to set things like stack and entry point of second kernel etc. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 + arch/x86/kernel/machine_kexec_64.c | 142 +++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 98fe3df6df82..9558b9fcafbf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1583,6 +1583,8 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" select BUILD_BIN2C + select CRYPTO + select CRYPTO_SHA256 ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c8875b5545e1..88404c440727 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -6,6 +6,8 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -328,3 +330,143 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return image->fops->cleanup(image); } + +/* + * Apply purgatory relocations. + * + * ehdr: Pointer to elf headers + * sechdrs: Pointer to section headers. + * relsec: section index of SHT_RELA section. + * + * TODO: Some of the code belongs to generic code. Move that in kexec.c. + */ +int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, + Elf64_Shdr *sechdrs, unsigned int relsec) +{ + unsigned int i; + Elf64_Rela *rel; + Elf64_Sym *sym; + void *location; + Elf64_Shdr *section, *symtabsec; + unsigned long address, sec_base, value; + const char *strtab, *name, *shstrtab; + + /* + * ->sh_offset has been modified to keep the pointer to section + * contents in memory + */ + rel = (void *)sechdrs[relsec].sh_offset; + + /* Section to which relocations apply */ + section = &sechdrs[sechdrs[relsec].sh_info]; + + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + + /* Associated symbol table */ + symtabsec = &sechdrs[sechdrs[relsec].sh_link]; + + /* String table */ + if (symtabsec->sh_link >= ehdr->e_shnum) { + /* Invalid strtab section number */ + pr_err("Invalid string table section index %d\n", + symtabsec->sh_link); + return -ENOEXEC; + } + + strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset; + + /* section header string table */ + shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + + /* + * rel[i].r_offset contains byte offset from beginning + * of section to the storage unit affected. + * + * This is location to update (->sh_offset). This is temporary + * buffer where section is currently loaded. This will finally + * be loaded to a different address later, pointed to by + * ->sh_addr. kexec takes care of moving it + * (kexec_load_segment()). + */ + location = (void *)(section->sh_offset + rel[i].r_offset); + + /* Final address of the location */ + address = section->sh_addr + rel[i].r_offset; + + /* + * rel[i].r_info contains information about symbol table index + * w.r.t which relocation must be made and type of relocation + * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get + * these respectively. + */ + sym = (Elf64_Sym *)symtabsec->sh_offset + + ELF64_R_SYM(rel[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n", + name, sym->st_info, sym->st_shndx, sym->st_value, + sym->st_size); + + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = sechdrs[sym->st_shndx].sh_addr; + + value = sym->st_value; + value += sec_base; + value += rel[i].r_addend; + + switch (ELF64_R_TYPE(rel[i].r_info)) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(u64 *)location = value; + break; + case R_X86_64_32: + *(u32 *)location = value; + if (value != *(u32 *)location) + goto overflow; + break; + case R_X86_64_32S: + *(s32 *)location = value; + if ((s64)value != *(s32 *)location) + goto overflow; + break; + case R_X86_64_PC32: + value -= (u64)address; + *(u32 *)location = value; + break; + default: + pr_err("Unknown rela relocation: %llu\n", + ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; + +overflow: + pr_err("Overflow in relocation type %d value 0x%lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), value); + return -ENOEXEC; +} -- cgit v1.2.2 From 27f48d3e633be23656a097baa3be336e04a82d84 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:06 -0700 Subject: kexec-bzImage64: support for loading bzImage using 64bit entry This is loader specific code which can load bzImage and set it up for 64bit entry. This does not take care of 32bit entry or real mode entry. 32bit mode entry can be implemented if somebody needs it. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kexec-bzimage64.h | 6 + arch/x86/include/asm/kexec.h | 21 ++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kexec-bzimage64.c | 375 +++++++++++++++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 5 +- 5 files changed, 406 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/kexec-bzimage64.h create mode 100644 arch/x86/kernel/kexec-bzimage64.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h new file mode 100644 index 000000000000..d1b5d194e31d --- /dev/null +++ b/arch/x86/include/asm/kexec-bzimage64.h @@ -0,0 +1,6 @@ +#ifndef _ASM_KEXEC_BZIMAGE64_H +#define _ASM_KEXEC_BZIMAGE64_H + +extern struct kexec_file_ops kexec_bzImage64_ops; + +#endif /* _ASM_KEXE_BZIMAGE64_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 17483a492f18..0dfccced4edf 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -23,6 +23,7 @@ #include #include +#include /* * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. @@ -161,6 +162,26 @@ struct kimage_arch { pmd_t *pmd; pte_t *pte; }; + +struct kexec_entry64_regs { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t rbp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; +}; #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3993624f1..b5ea75c4a4b4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o + obj-$(CONFIG_KEXEC) += kexec-bzimage64.o endif diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c new file mode 100644 index 000000000000..bcedd100192f --- /dev/null +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -0,0 +1,375 @@ +/* + * Kexec bzImage loader + * + * Copyright (C) 2014 Red Hat Inc. + * Authors: + * Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define pr_fmt(fmt) "kexec-bzImage64: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Defines lowest physical address for various segments. Not sure where + * exactly these limits came from. Current bzimage64 loader in kexec-tools + * uses these so I am retaining it. It can be changed over time as we gain + * more insight. + */ +#define MIN_PURGATORY_ADDR 0x3000 +#define MIN_BOOTPARAM_ADDR 0x3000 +#define MIN_KERNEL_LOAD_ADDR 0x100000 +#define MIN_INITRD_LOAD_ADDR 0x1000000 + +/* + * This is a place holder for all boot loader specific data structure which + * gets allocated in one call but gets freed much later during cleanup + * time. Right now there is only one field but it can grow as need be. + */ +struct bzimage64_data { + /* + * Temporary buffer to hold bootparams buffer. This should be + * freed once the bootparam segment has been loaded. + */ + void *bootparams_buf; +}; + +static int setup_initrd(struct boot_params *params, + unsigned long initrd_load_addr, unsigned long initrd_len) +{ + params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL; + params->hdr.ramdisk_size = initrd_len & 0xffffffffUL; + + params->ext_ramdisk_image = initrd_load_addr >> 32; + params->ext_ramdisk_size = initrd_len >> 32; + + return 0; +} + +static int setup_cmdline(struct boot_params *params, + unsigned long bootparams_load_addr, + unsigned long cmdline_offset, char *cmdline, + unsigned long cmdline_len) +{ + char *cmdline_ptr = ((char *)params) + cmdline_offset; + unsigned long cmdline_ptr_phys; + uint32_t cmdline_low_32, cmdline_ext_32; + + memcpy(cmdline_ptr, cmdline, cmdline_len); + cmdline_ptr[cmdline_len - 1] = '\0'; + + cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; + cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; + cmdline_ext_32 = cmdline_ptr_phys >> 32; + + params->hdr.cmd_line_ptr = cmdline_low_32; + if (cmdline_ext_32) + params->ext_cmd_line_ptr = cmdline_ext_32; + + return 0; +} + +static int setup_memory_map_entries(struct boot_params *params) +{ + unsigned int nr_e820_entries; + + nr_e820_entries = e820_saved.nr_map; + + /* TODO: Pass entries more than E820MAX in bootparams setup data */ + if (nr_e820_entries > E820MAX) + nr_e820_entries = E820MAX; + + params->e820_entries = nr_e820_entries; + memcpy(¶ms->e820_map, &e820_saved.map, + nr_e820_entries * sizeof(struct e820entry)); + + return 0; +} + +static int setup_boot_parameters(struct boot_params *params) +{ + unsigned int nr_e820_entries; + unsigned long long mem_k, start, end; + int i; + + /* Get subarch from existing bootparams */ + params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; + + /* Copying screen_info will do? */ + memcpy(¶ms->screen_info, &boot_params.screen_info, + sizeof(struct screen_info)); + + /* Fill in memsize later */ + params->screen_info.ext_mem_k = 0; + params->alt_mem_k = 0; + + /* Default APM info */ + memset(¶ms->apm_bios_info, 0, sizeof(params->apm_bios_info)); + + /* Default drive info */ + memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); + memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); + + /* Default sysdesc table */ + params->sys_desc_table.length = 0; + + setup_memory_map_entries(params); + nr_e820_entries = params->e820_entries; + + for (i = 0; i < nr_e820_entries; i++) { + if (params->e820_map[i].type != E820_RAM) + continue; + start = params->e820_map[i].addr; + end = params->e820_map[i].addr + params->e820_map[i].size - 1; + + if ((start <= 0x100000) && end > 0x100000) { + mem_k = (end >> 10) - (0x100000 >> 10); + params->screen_info.ext_mem_k = mem_k; + params->alt_mem_k = mem_k; + if (mem_k > 0xfc00) + params->screen_info.ext_mem_k = 0xfc00; /* 64M*/ + if (mem_k > 0xffffffff) + params->alt_mem_k = 0xffffffff; + } + } + + /* Setup EDD info */ + memcpy(params->eddbuf, boot_params.eddbuf, + EDDMAXNR * sizeof(struct edd_info)); + params->eddbuf_entries = boot_params.eddbuf_entries; + + memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, + EDD_MBR_SIG_MAX * sizeof(unsigned int)); + + return 0; +} + +int bzImage64_probe(const char *buf, unsigned long len) +{ + int ret = -ENOEXEC; + struct setup_header *header; + + /* kernel should be atleast two sectors long */ + if (len < 2 * 512) { + pr_err("File is too short to be a bzImage\n"); + return ret; + } + + header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr)); + if (memcmp((char *)&header->header, "HdrS", 4) != 0) { + pr_err("Not a bzImage\n"); + return ret; + } + + if (header->boot_flag != 0xAA55) { + pr_err("No x86 boot sector present\n"); + return ret; + } + + if (header->version < 0x020C) { + pr_err("Must be at least protocol version 2.12\n"); + return ret; + } + + if (!(header->loadflags & LOADED_HIGH)) { + pr_err("zImage not a bzImage\n"); + return ret; + } + + if (!(header->xloadflags & XLF_KERNEL_64)) { + pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n"); + return ret; + } + + if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) { + pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n"); + return ret; + } + + /* I've got a bzImage */ + pr_debug("It's a relocatable bzImage64\n"); + ret = 0; + + return ret; +} + +void *bzImage64_load(struct kimage *image, char *kernel, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + + struct setup_header *header; + int setup_sects, kern16_size, ret = 0; + unsigned long setup_header_size, params_cmdline_sz; + struct boot_params *params; + unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; + unsigned long purgatory_load_addr; + unsigned long kernel_bufsz, kernel_memsz, kernel_align; + char *kernel_buf; + struct bzimage64_data *ldata; + struct kexec_entry64_regs regs64; + void *stack; + unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr); + + header = (struct setup_header *)(kernel + setup_hdr_offset); + setup_sects = header->setup_sects; + if (setup_sects == 0) + setup_sects = 4; + + kern16_size = (setup_sects + 1) * 512; + if (kernel_len < kern16_size) { + pr_err("bzImage truncated\n"); + return ERR_PTR(-ENOEXEC); + } + + if (cmdline_len > header->cmdline_size) { + pr_err("Kernel command line too long\n"); + return ERR_PTR(-EINVAL); + } + + /* + * Load purgatory. For 64bit entry point, purgatory code can be + * anywhere. + */ + ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed\n"); + return ERR_PTR(ret); + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + /* Load Bootparams and cmdline */ + params_cmdline_sz = sizeof(struct boot_params) + cmdline_len; + params = kzalloc(params_cmdline_sz, GFP_KERNEL); + if (!params) + return ERR_PTR(-ENOMEM); + + /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; + + /* Is there a limit on setup header size? */ + memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size); + + ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz, + params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR, + ULONG_MAX, 1, &bootparam_load_addr); + if (ret) + goto out_free_params; + pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + bootparam_load_addr, params_cmdline_sz, params_cmdline_sz); + + /* Load kernel */ + kernel_buf = kernel + kern16_size; + kernel_bufsz = kernel_len - kern16_size; + kernel_memsz = PAGE_ALIGN(header->init_size); + kernel_align = header->kernel_alignment; + + ret = kexec_add_buffer(image, kernel_buf, + kernel_bufsz, kernel_memsz, kernel_align, + MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1, + &kernel_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kernel_load_addr, kernel_memsz, kernel_memsz); + + /* Load initrd high */ + if (initrd) { + ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len, + PAGE_SIZE, MIN_INITRD_LOAD_ADDR, + ULONG_MAX, 1, &initrd_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + initrd_load_addr, initrd_len, initrd_len); + + setup_initrd(params, initrd_load_addr, initrd_len); + } + + setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params), + cmdline, cmdline_len); + + /* bootloader info. Do we need a separate ID for kexec kernel loader? */ + params->hdr.type_of_loader = 0x0D << 4; + params->hdr.loadflags = 0; + + /* Setup purgatory regs for entry */ + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 1); + if (ret) + goto out_free_params; + + regs64.rbx = 0; /* Bootstrap Processor */ + regs64.rsi = bootparam_load_addr; + regs64.rip = kernel_load_addr + 0x200; + stack = kexec_purgatory_get_symbol_addr(image, "stack_end"); + if (IS_ERR(stack)) { + pr_err("Could not find address of symbol stack_end\n"); + ret = -EINVAL; + goto out_free_params; + } + + regs64.rsp = (unsigned long)stack; + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 0); + if (ret) + goto out_free_params; + + setup_boot_parameters(params); + + /* Allocate loader specific data */ + ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); + if (!ldata) { + ret = -ENOMEM; + goto out_free_params; + } + + /* + * Store pointer to params so that it could be freed after loading + * params segment has been loaded and contents have been copied + * somewhere else. + */ + ldata->bootparams_buf = params; + return ldata; + +out_free_params: + kfree(params); + return ERR_PTR(ret); +} + +/* This cleanup function is called after various segments have been loaded */ +int bzImage64_cleanup(void *loader_data) +{ + struct bzimage64_data *ldata = loader_data; + + if (!ldata) + return 0; + + kfree(ldata->bootparams_buf); + ldata->bootparams_buf = NULL; + + return 0; +} + +struct kexec_file_ops kexec_bzImage64_ops = { + .probe = bzImage64_probe, + .load = bzImage64_load, + .cleanup = bzImage64_cleanup, +}; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 88404c440727..18d0f9e0b6da 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -23,9 +23,10 @@ #include #include #include +#include static struct kexec_file_ops *kexec_file_loaders[] = { - NULL, + &kexec_bzImage64_ops, }; static void free_transition_pgtable(struct kimage *image) @@ -328,7 +329,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) if (!image->fops || !image->fops->cleanup) return 0; - return image->fops->cleanup(image); + return image->fops->cleanup(image->image_loader_data); } /* -- cgit v1.2.2 From dd5f726076cc7639d9713b334c8c133f77c6757a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:09 -0700 Subject: kexec: support for kexec on panic using new system call This patch adds support for loading a kexec on panic (kdump) kernel usning new system call. It prepares ELF headers for memory areas to be dumped and for saved cpu registers. Also prepares the memory map for second kernel and limits its boot to reserved areas only. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/crash.h | 9 + arch/x86/include/asm/kexec.h | 30 +- arch/x86/kernel/crash.c | 563 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/kexec-bzimage64.c | 55 +++- arch/x86/kernel/machine_kexec_64.c | 40 +++ arch/x86/purgatory/entry64.S | 6 +- 6 files changed, 687 insertions(+), 16 deletions(-) create mode 100644 arch/x86/include/asm/crash.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h new file mode 100644 index 000000000000..f498411f2500 --- /dev/null +++ b/arch/x86/include/asm/crash.h @@ -0,0 +1,9 @@ +#ifndef _ASM_X86_CRASH_H +#define _ASM_X86_CRASH_H + +int crash_load_segments(struct kimage *image); +int crash_copy_backup_region(struct kimage *image); +int crash_setup_memmap_entries(struct kimage *image, + struct boot_params *params); + +#endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 0dfccced4edf..d2434c1cad05 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -25,6 +25,8 @@ #include #include +struct kimage; + /* * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. * I.e. Maximum page that is mapped directly into kernel memory, @@ -62,6 +64,10 @@ # define KEXEC_ARCH KEXEC_ARCH_X86_64 #endif +/* Memory to backup during crash kdump */ +#define KEXEC_BACKUP_SRC_START (0UL) +#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ + /* * CPU does not save ss and sp on stack if execution is already * running in kernel mode at the time of NMI occurrence. This code @@ -161,17 +167,35 @@ struct kimage_arch { pud_t *pud; pmd_t *pmd; pte_t *pte; + /* Details of backup region */ + unsigned long backup_src_start; + unsigned long backup_src_sz; + + /* Physical address of backup segment */ + unsigned long backup_load_addr; + + /* Core ELF header buffer */ + void *elf_headers; + unsigned long elf_headers_sz; + unsigned long elf_load_addr; }; +#endif /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_64 +/* + * Number of elements and order of elements in this structure should match + * with the ones in arch/x86/purgatory/entry64.S. If you make a change here + * make an appropriate change in purgatory too. + */ struct kexec_entry64_regs { uint64_t rax; - uint64_t rbx; uint64_t rcx; uint64_t rdx; - uint64_t rsi; - uint64_t rdi; + uint64_t rbx; uint64_t rsp; uint64_t rbp; + uint64_t rsi; + uint64_t rdi; uint64_t r8; uint64_t r9; uint64_t r10; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 507de8066594..0553a34fa0df 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -4,9 +4,14 @@ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * * Copyright (C) IBM Corporation, 2004. All rights reserved. + * Copyright (C) Red Hat Inc., 2014. All rights reserved. + * Authors: + * Vivek Goyal * */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -16,6 +21,7 @@ #include #include #include +#include #include #include @@ -28,6 +34,45 @@ #include #include +/* Alignment required for elf header segment */ +#define ELF_CORE_HEADER_ALIGN 4096 + +/* This primarily represents number of split ranges due to exclusion */ +#define CRASH_MAX_RANGES 16 + +struct crash_mem_range { + u64 start, end; +}; + +struct crash_mem { + unsigned int nr_ranges; + struct crash_mem_range ranges[CRASH_MAX_RANGES]; +}; + +/* Misc data about ram ranges needed to prepare elf headers */ +struct crash_elf_data { + struct kimage *image; + /* + * Total number of ram ranges we have after various adjustments for + * GART, crash reserved region etc. + */ + unsigned int max_nr_ranges; + unsigned long gart_start, gart_end; + + /* Pointer to elf header */ + void *ehdr; + /* Pointer to next phdr */ + void *bufp; + struct crash_mem mem; +}; + +/* Used while preparing memory map entries for second kernel */ +struct crash_memmap_data { + struct boot_params *params; + /* Type of memory */ + unsigned int type; +}; + int in_crash_kexec; /* @@ -39,6 +84,7 @@ int in_crash_kexec; */ crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); +unsigned long crash_zero_bytes; static inline void cpu_crash_vmclear_loaded_vmcss(void) { @@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif crash_save_cpu(regs, safe_smp_processor_id()); } + +#ifdef CONFIG_X86_64 + +static int get_nr_ram_ranges_callback(unsigned long start_pfn, + unsigned long nr_pfn, void *arg) +{ + int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int get_gart_ranges_callback(u64 start, u64 end, void *arg) +{ + struct crash_elf_data *ced = arg; + + ced->gart_start = start; + ced->gart_end = end; + + /* Not expecting more than 1 gart aperture */ + return 1; +} + + +/* Gather all the required information to prepare elf headers for ram regions */ +static void fill_up_crash_elf_data(struct crash_elf_data *ced, + struct kimage *image) +{ + unsigned int nr_ranges = 0; + + ced->image = image; + + walk_system_ram_range(0, -1, &nr_ranges, + get_nr_ram_ranges_callback); + + ced->max_nr_ranges = nr_ranges; + + /* + * We don't create ELF headers for GART aperture as an attempt + * to dump this memory in second kernel leads to hang/crash. + * If gart aperture is present, one needs to exclude that region + * and that could lead to need of extra phdr. + */ + walk_iomem_res("GART", IORESOURCE_MEM, 0, -1, + ced, get_gart_ranges_callback); + + /* + * If we have gart region, excluding that could potentially split + * a memory range, resulting in extra header. Account for that. + */ + if (ced->gart_end) + ced->max_nr_ranges++; + + /* Exclusion of crash region could split memory ranges */ + ced->max_nr_ranges++; + + /* If crashk_low_res is not 0, another range split possible */ + if (crashk_low_res.end != 0) + ced->max_nr_ranges++; +} + +static int exclude_mem_range(struct crash_mem *mem, + unsigned long long mstart, unsigned long long mend) +{ + int i, j; + unsigned long long start, end; + struct crash_mem_range temp_range = {0, 0}; + + for (i = 0; i < mem->nr_ranges; i++) { + start = mem->ranges[i].start; + end = mem->ranges[i].end; + + if (mstart > end || mend < start) + continue; + + /* Truncate any area outside of range */ + if (mstart < start) + mstart = start; + if (mend > end) + mend = end; + + /* Found completely overlapping range */ + if (mstart == start && mend == end) { + mem->ranges[i].start = 0; + mem->ranges[i].end = 0; + if (i < mem->nr_ranges - 1) { + /* Shift rest of the ranges to left */ + for (j = i; j < mem->nr_ranges - 1; j++) { + mem->ranges[j].start = + mem->ranges[j+1].start; + mem->ranges[j].end = + mem->ranges[j+1].end; + } + } + mem->nr_ranges--; + return 0; + } + + if (mstart > start && mend < end) { + /* Split original range */ + mem->ranges[i].end = mstart - 1; + temp_range.start = mend + 1; + temp_range.end = end; + } else if (mstart != start) + mem->ranges[i].end = mstart - 1; + else + mem->ranges[i].start = mend + 1; + break; + } + + /* If a split happend, add the split to array */ + if (!temp_range.end) + return 0; + + /* Split happened */ + if (i == CRASH_MAX_RANGES - 1) { + pr_err("Too many crash ranges after split\n"); + return -ENOMEM; + } + + /* Location where new range should go */ + j = i + 1; + if (j < mem->nr_ranges) { + /* Move over all ranges one slot towards the end */ + for (i = mem->nr_ranges - 1; i >= j; i--) + mem->ranges[i + 1] = mem->ranges[i]; + } + + mem->ranges[j].start = temp_range.start; + mem->ranges[j].end = temp_range.end; + mem->nr_ranges++; + return 0; +} + +/* + * Look for any unwanted ranges between mstart, mend and remove them. This + * might lead to split and split ranges are put in ced->mem.ranges[] array + */ +static int elf_header_exclude_ranges(struct crash_elf_data *ced, + unsigned long long mstart, unsigned long long mend) +{ + struct crash_mem *cmem = &ced->mem; + int ret = 0; + + memset(cmem->ranges, 0, sizeof(cmem->ranges)); + + cmem->ranges[0].start = mstart; + cmem->ranges[0].end = mend; + cmem->nr_ranges = 1; + + /* Exclude crashkernel region */ + ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + + /* Exclude GART region */ + if (ced->gart_end) { + ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end); + if (ret) + return ret; + } + + return ret; +} + +static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) +{ + struct crash_elf_data *ced = arg; + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + unsigned long mstart, mend; + struct kimage *image = ced->image; + struct crash_mem *cmem; + int ret, i; + + ehdr = ced->ehdr; + + /* Exclude unwanted mem ranges */ + ret = elf_header_exclude_ranges(ced, start, end); + if (ret) + return ret; + + /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */ + cmem = &ced->mem; + + for (i = 0; i < cmem->nr_ranges; i++) { + mstart = cmem->ranges[i].start; + mend = cmem->ranges[i].end; + + phdr = ced->bufp; + ced->bufp += sizeof(Elf64_Phdr); + + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mstart; + + /* + * If a range matches backup region, adjust offset to backup + * segment. + */ + if (mstart == image->arch.backup_src_start && + (mend - mstart + 1) == image->arch.backup_src_sz) + phdr->p_offset = image->arch.backup_load_addr; + + phdr->p_paddr = mstart; + phdr->p_vaddr = (unsigned long long) __va(mstart); + phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; + phdr->p_align = 0; + ehdr->e_phnum++; + pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", + phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, + ehdr->e_phnum, phdr->p_offset); + } + + return ret; +} + +static int prepare_elf64_headers(struct crash_elf_data *ced, + void **addr, unsigned long *sz) +{ + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; + unsigned char *buf, *bufp; + unsigned int cpu; + unsigned long long notes_addr; + int ret; + + /* extra phdr for vmcoreinfo elf note */ + nr_phdr = nr_cpus + 1; + nr_phdr += ced->max_nr_ranges; + + /* + * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping + * area on x86_64 (ffffffff80000000 - ffffffffa0000000). + * I think this is required by tools like gdb. So same physical + * memory will be mapped in two elf headers. One will contain kernel + * text virtual addresses and other will have __va(physical) addresses. + */ + + nr_phdr++; + elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); + elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); + + buf = vzalloc(elf_sz); + if (!buf) + return -ENOMEM; + + bufp = buf; + ehdr = (Elf64_Ehdr *)bufp; + bufp += sizeof(Elf64_Ehdr); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + ehdr->e_ident[EI_OSABI] = ELF_OSABI; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = ELF_ARCH; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + + /* Prepare one phdr of type PT_NOTE for each present cpu */ + for_each_present_cpu(cpu) { + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_NOTE; + notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); + phdr->p_offset = phdr->p_paddr = notes_addr; + phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); + (ehdr->e_phnum)++; + } + + /* Prepare one PT_NOTE header for vmcoreinfo */ + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_NOTE; + phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); + phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note); + (ehdr->e_phnum)++; + +#ifdef CONFIG_X86_64 + /* Prepare PT_LOAD type program header for kernel text region */ + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (Elf64_Addr)_text; + phdr->p_filesz = phdr->p_memsz = _end - _text; + phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); + (ehdr->e_phnum)++; +#endif + + /* Prepare PT_LOAD headers for system ram chunks. */ + ced->ehdr = ehdr; + ced->bufp = bufp; + ret = walk_system_ram_res(0, -1, ced, + prepare_elf64_ram_headers_callback); + if (ret < 0) + return ret; + + *addr = buf; + *sz = elf_sz; + return 0; +} + +/* Prepare elf headers. Return addr and size */ +static int prepare_elf_headers(struct kimage *image, void **addr, + unsigned long *sz) +{ + struct crash_elf_data *ced; + int ret; + + ced = kzalloc(sizeof(*ced), GFP_KERNEL); + if (!ced) + return -ENOMEM; + + fill_up_crash_elf_data(ced, image); + + /* By default prepare 64bit headers */ + ret = prepare_elf64_headers(ced, addr, sz); + kfree(ced); + return ret; +} + +static int add_e820_entry(struct boot_params *params, struct e820entry *entry) +{ + unsigned int nr_e820_entries; + + nr_e820_entries = params->e820_entries; + if (nr_e820_entries >= E820MAX) + return 1; + + memcpy(¶ms->e820_map[nr_e820_entries], entry, + sizeof(struct e820entry)); + params->e820_entries++; + return 0; +} + +static int memmap_entry_callback(u64 start, u64 end, void *arg) +{ + struct crash_memmap_data *cmd = arg; + struct boot_params *params = cmd->params; + struct e820entry ei; + + ei.addr = start; + ei.size = end - start + 1; + ei.type = cmd->type; + add_e820_entry(params, &ei); + + return 0; +} + +static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, + unsigned long long mstart, + unsigned long long mend) +{ + unsigned long start, end; + int ret = 0; + + cmem->ranges[0].start = mstart; + cmem->ranges[0].end = mend; + cmem->nr_ranges = 1; + + /* Exclude Backup region */ + start = image->arch.backup_load_addr; + end = start + image->arch.backup_src_sz - 1; + ret = exclude_mem_range(cmem, start, end); + if (ret) + return ret; + + /* Exclude elf header region */ + start = image->arch.elf_load_addr; + end = start + image->arch.elf_headers_sz - 1; + return exclude_mem_range(cmem, start, end); +} + +/* Prepare memory map for crash dump kernel */ +int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) +{ + int i, ret = 0; + unsigned long flags; + struct e820entry ei; + struct crash_memmap_data cmd; + struct crash_mem *cmem; + + cmem = vzalloc(sizeof(struct crash_mem)); + if (!cmem) + return -ENOMEM; + + memset(&cmd, 0, sizeof(struct crash_memmap_data)); + cmd.params = params; + + /* Add first 640K segment */ + ei.addr = image->arch.backup_src_start; + ei.size = image->arch.backup_src_sz; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + + /* Add ACPI tables */ + cmd.type = E820_ACPI; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd, + memmap_entry_callback); + + /* Add ACPI Non-volatile Storage */ + cmd.type = E820_NVS; + walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd, + memmap_entry_callback); + + /* Add crashk_low_res region */ + if (crashk_low_res.end) { + ei.addr = crashk_low_res.start; + ei.size = crashk_low_res.end - crashk_low_res.start + 1; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + } + + /* Exclude some ranges from crashk_res and add rest to memmap */ + ret = memmap_exclude_ranges(image, cmem, crashk_res.start, + crashk_res.end); + if (ret) + goto out; + + for (i = 0; i < cmem->nr_ranges; i++) { + ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1; + + /* If entry is less than a page, skip it */ + if (ei.size < PAGE_SIZE) + continue; + ei.addr = cmem->ranges[i].start; + ei.type = E820_RAM; + add_e820_entry(params, &ei); + } + +out: + vfree(cmem); + return ret; +} + +static int determine_backup_region(u64 start, u64 end, void *arg) +{ + struct kimage *image = arg; + + image->arch.backup_src_start = start; + image->arch.backup_src_sz = end - start + 1; + + /* Expecting only one range for backup region */ + return 1; +} + +int crash_load_segments(struct kimage *image) +{ + unsigned long src_start, src_sz, elf_sz; + void *elf_addr; + int ret; + + /* + * Determine and load a segment for backup area. First 640K RAM + * region is backup source + */ + + ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, + image, determine_backup_region); + + /* Zero or postive return values are ok */ + if (ret < 0) + return ret; + + src_start = image->arch.backup_src_start; + src_sz = image->arch.backup_src_sz; + + /* Add backup segment. */ + if (src_sz) { + /* + * Ideally there is no source for backup segment. This is + * copied in purgatory after crash. Just add a zero filled + * segment for now to make sure checksum logic works fine. + */ + ret = kexec_add_buffer(image, (char *)&crash_zero_bytes, + sizeof(crash_zero_bytes), src_sz, + PAGE_SIZE, 0, -1, 0, + &image->arch.backup_load_addr); + if (ret) + return ret; + pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n", + image->arch.backup_load_addr, src_start, src_sz); + } + + /* Prepare elf headers and add a segment */ + ret = prepare_elf_headers(image, &elf_addr, &elf_sz); + if (ret) + return ret; + + image->arch.elf_headers = elf_addr; + image->arch.elf_headers_sz = elf_sz; + + ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz, + ELF_CORE_HEADER_ALIGN, 0, -1, 0, + &image->arch.elf_load_addr); + if (ret) { + vfree((void *)image->arch.elf_headers); + return ret; + } + pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->arch.elf_load_addr, elf_sz, elf_sz); + + return ret; +} + +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index bcedd100192f..a8e646458a10 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -21,6 +21,9 @@ #include #include +#include + +#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ /* * Defines lowest physical address for various segments. Not sure where @@ -58,18 +61,24 @@ static int setup_initrd(struct boot_params *params, return 0; } -static int setup_cmdline(struct boot_params *params, +static int setup_cmdline(struct kimage *image, struct boot_params *params, unsigned long bootparams_load_addr, unsigned long cmdline_offset, char *cmdline, unsigned long cmdline_len) { char *cmdline_ptr = ((char *)params) + cmdline_offset; - unsigned long cmdline_ptr_phys; + unsigned long cmdline_ptr_phys, len; uint32_t cmdline_low_32, cmdline_ext_32; memcpy(cmdline_ptr, cmdline, cmdline_len); + if (image->type == KEXEC_TYPE_CRASH) { + len = sprintf(cmdline_ptr + cmdline_len - 1, + " elfcorehdr=0x%lx", image->arch.elf_load_addr); + cmdline_len += len; + } cmdline_ptr[cmdline_len - 1] = '\0'; + pr_debug("Final command line is: %s\n", cmdline_ptr); cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; cmdline_ext_32 = cmdline_ptr_phys >> 32; @@ -98,11 +107,12 @@ static int setup_memory_map_entries(struct boot_params *params) return 0; } -static int setup_boot_parameters(struct boot_params *params) +static int setup_boot_parameters(struct kimage *image, + struct boot_params *params) { unsigned int nr_e820_entries; unsigned long long mem_k, start, end; - int i; + int i, ret = 0; /* Get subarch from existing bootparams */ params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; @@ -125,7 +135,13 @@ static int setup_boot_parameters(struct boot_params *params) /* Default sysdesc table */ params->sys_desc_table.length = 0; - setup_memory_map_entries(params); + if (image->type == KEXEC_TYPE_CRASH) { + ret = crash_setup_memmap_entries(image, params); + if (ret) + return ret; + } else + setup_memory_map_entries(params); + nr_e820_entries = params->e820_entries; for (i = 0; i < nr_e820_entries; i++) { @@ -153,7 +169,7 @@ static int setup_boot_parameters(struct boot_params *params) memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, EDD_MBR_SIG_MAX * sizeof(unsigned int)); - return 0; + return ret; } int bzImage64_probe(const char *buf, unsigned long len) @@ -240,6 +256,22 @@ void *bzImage64_load(struct kimage *image, char *kernel, return ERR_PTR(-EINVAL); } + /* + * In case of crash dump, we will append elfcorehdr= to + * command line. Make sure it does not overflow + */ + if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) { + pr_debug("Appending elfcorehdr= to command line exceeds maximum allowed length\n"); + return ERR_PTR(-EINVAL); + } + + /* Allocate and load backup region */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = crash_load_segments(image); + if (ret) + return ERR_PTR(ret); + } + /* * Load purgatory. For 64bit entry point, purgatory code can be * anywhere. @@ -254,7 +286,8 @@ void *bzImage64_load(struct kimage *image, char *kernel, pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); /* Load Bootparams and cmdline */ - params_cmdline_sz = sizeof(struct boot_params) + cmdline_len; + params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + + MAX_ELFCOREHDR_STR_LEN; params = kzalloc(params_cmdline_sz, GFP_KERNEL); if (!params) return ERR_PTR(-ENOMEM); @@ -303,8 +336,8 @@ void *bzImage64_load(struct kimage *image, char *kernel, setup_initrd(params, initrd_load_addr, initrd_len); } - setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params), - cmdline, cmdline_len); + setup_cmdline(image, params, bootparam_load_addr, + sizeof(struct boot_params), cmdline, cmdline_len); /* bootloader info. Do we need a separate ID for kexec kernel loader? */ params->hdr.type_of_loader = 0x0D << 4; @@ -332,7 +365,9 @@ void *bzImage64_load(struct kimage *image, char *kernel, if (ret) goto out_free_params; - setup_boot_parameters(params); + ret = setup_boot_parameters(image, params); + if (ret) + goto out_free_params; /* Allocate loader specific data */ ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 18d0f9e0b6da..9330434da777 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -178,6 +178,38 @@ static void load_segments(void) ); } +/* Update purgatory as needed after various image segments have been prepared */ +static int arch_update_purgatory(struct kimage *image) +{ + int ret = 0; + + if (!image->file_mode) + return 0; + + /* Setup copying of backup region */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + &image->arch.backup_load_addr, + sizeof(image->arch.backup_load_addr), 0); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "backup_src", + &image->arch.backup_src_start, + sizeof(image->arch.backup_src_start), 0); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + &image->arch.backup_src_sz, + sizeof(image->arch.backup_src_sz), 0); + if (ret) + return ret; + } + + return ret; +} + int machine_kexec_prepare(struct kimage *image) { unsigned long start_pgtable; @@ -191,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image) if (result) return result; + /* update purgatory as needed */ + result = arch_update_purgatory(image); + if (result) + return result; + return 0; } @@ -315,6 +352,9 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, void *arch_kexec_kernel_image_load(struct kimage *image) { + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S index be3249d7ed2d..d1a4291d3568 100644 --- a/arch/x86/purgatory/entry64.S +++ b/arch/x86/purgatory/entry64.S @@ -61,13 +61,13 @@ new_cs_exit: .balign 4 entry64_regs: rax: .quad 0x0 -rbx: .quad 0x0 rcx: .quad 0x0 rdx: .quad 0x0 -rsi: .quad 0x0 -rdi: .quad 0x0 +rbx: .quad 0x0 rsp: .quad 0x0 rbp: .quad 0x0 +rsi: .quad 0x0 +rdi: .quad 0x0 r8: .quad 0x0 r9: .quad 0x0 r10: .quad 0x0 -- cgit v1.2.2 From 6a2c20e7d8900ed273dc34a9af9bf02fc478e427 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:11 -0700 Subject: kexec: support kexec/kdump on EFI systems This patch does two things. It passes EFI run time mappings to second kernel in bootparams efi_info. Second kernel parse this info and create new mappings in second kernel. That means mappings in first and second kernel will be same. This paves the way to enable EFI in kexec kernel. This patch also prepares and passes EFI setup data through bootparams. This contains bunch of information about various tables and their addresses. These information gathering and passing has been written along the lines of what current kexec-tools is doing to make kexec work with UEFI. [akpm@linux-foundation.org: s/get_efi/efi_get/g, per Matt] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Cc: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kexec-bzimage64.c | 146 ++++++++++++++++++++++++++++++++++---- 1 file changed, 134 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index a8e646458a10..623e6c58081f 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include +#include #define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ @@ -90,7 +92,7 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params, return 0; } -static int setup_memory_map_entries(struct boot_params *params) +static int setup_e820_entries(struct boot_params *params) { unsigned int nr_e820_entries; @@ -107,8 +109,93 @@ static int setup_memory_map_entries(struct boot_params *params) return 0; } -static int setup_boot_parameters(struct kimage *image, - struct boot_params *params) +#ifdef CONFIG_EFI +static int setup_efi_info_memmap(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, + unsigned int efi_map_sz) +{ + void *efi_map = (void *)params + efi_map_offset; + unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset; + struct efi_info *ei = ¶ms->efi_info; + + if (!efi_map_sz) + return 0; + + efi_runtime_map_copy(efi_map, efi_map_sz); + + ei->efi_memmap = efi_map_phys_addr & 0xffffffff; + ei->efi_memmap_hi = efi_map_phys_addr >> 32; + ei->efi_memmap_size = efi_map_sz; + + return 0; +} + +static int +prepare_add_efi_setup_data(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_setup_data_offset) +{ + unsigned long setup_data_phys; + struct setup_data *sd = (void *)params + efi_setup_data_offset; + struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data); + + esd->fw_vendor = efi.fw_vendor; + esd->runtime = efi.runtime; + esd->tables = efi.config_table; + esd->smbios = efi.smbios; + + sd->type = SETUP_EFI; + sd->len = sizeof(struct efi_setup_data); + + /* Add setup data */ + setup_data_phys = params_load_addr + efi_setup_data_offset; + sd->next = params->hdr.setup_data; + params->hdr.setup_data = setup_data_phys; + + return 0; +} + +static int +setup_efi_state(struct boot_params *params, unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) +{ + struct efi_info *current_ei = &boot_params.efi_info; + struct efi_info *ei = ¶ms->efi_info; + + if (!current_ei->efi_memmap_size) + return 0; + + /* + * If 1:1 mapping is not enabled, second kernel can not setup EFI + * and use EFI run time services. User space will have to pass + * acpi_rsdp= on kernel command line to make second kernel boot + * without efi. + */ + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + ei->efi_loader_signature = current_ei->efi_loader_signature; + ei->efi_systab = current_ei->efi_systab; + ei->efi_systab_hi = current_ei->efi_systab_hi; + + ei->efi_memdesc_version = current_ei->efi_memdesc_version; + ei->efi_memdesc_size = efi_get_runtime_map_desc_size(); + + setup_efi_info_memmap(params, params_load_addr, efi_map_offset, + efi_map_sz); + prepare_add_efi_setup_data(params, params_load_addr, + efi_setup_data_offset); + return 0; +} +#endif /* CONFIG_EFI */ + +static int +setup_boot_parameters(struct kimage *image, struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) { unsigned int nr_e820_entries; unsigned long long mem_k, start, end; @@ -140,7 +227,7 @@ static int setup_boot_parameters(struct kimage *image, if (ret) return ret; } else - setup_memory_map_entries(params); + setup_e820_entries(params); nr_e820_entries = params->e820_entries; @@ -161,6 +248,12 @@ static int setup_boot_parameters(struct kimage *image, } } +#ifdef CONFIG_EFI + /* Setup EFI state */ + setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz, + efi_setup_data_offset); +#endif + /* Setup EDD info */ memcpy(params->eddbuf, boot_params.eddbuf, EDDMAXNR * sizeof(struct edd_info)); @@ -214,6 +307,15 @@ int bzImage64_probe(const char *buf, unsigned long len) return ret; } + /* + * Can't handle 32bit EFI as it does not allow loading kernel + * above 4G. This should be handled by 32bit bzImage loader + */ + if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) { + pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n"); + return ret; + } + /* I've got a bzImage */ pr_debug("It's a relocatable bzImage64\n"); ret = 0; @@ -229,7 +331,7 @@ void *bzImage64_load(struct kimage *image, char *kernel, struct setup_header *header; int setup_sects, kern16_size, ret = 0; - unsigned long setup_header_size, params_cmdline_sz; + unsigned long setup_header_size, params_cmdline_sz, params_misc_sz; struct boot_params *params; unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; unsigned long purgatory_load_addr; @@ -239,6 +341,7 @@ void *bzImage64_load(struct kimage *image, char *kernel, struct kexec_entry64_regs regs64; void *stack; unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr); + unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset; header = (struct setup_header *)(kernel + setup_hdr_offset); setup_sects = header->setup_sects; @@ -285,12 +388,29 @@ void *bzImage64_load(struct kimage *image, char *kernel, pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); - /* Load Bootparams and cmdline */ + + /* + * Load Bootparams and cmdline and space for efi stuff. + * + * Allocate memory together for multiple data structures so + * that they all can go in single area/segment and we don't + * have to create separate segment for each. Keeps things + * little bit simple + */ + efi_map_sz = efi_get_runtime_map_size(); + efi_map_sz = ALIGN(efi_map_sz, 16); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; - params = kzalloc(params_cmdline_sz, GFP_KERNEL); + params_cmdline_sz = ALIGN(params_cmdline_sz, 16); + params_misc_sz = params_cmdline_sz + efi_map_sz + + sizeof(struct setup_data) + + sizeof(struct efi_setup_data); + + params = kzalloc(params_misc_sz, GFP_KERNEL); if (!params) return ERR_PTR(-ENOMEM); + efi_map_offset = params_cmdline_sz; + efi_setup_data_offset = efi_map_offset + efi_map_sz; /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; @@ -298,13 +418,13 @@ void *bzImage64_load(struct kimage *image, char *kernel, /* Is there a limit on setup header size? */ memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size); - ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz, - params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR, + ret = kexec_add_buffer(image, (char *)params, params_misc_sz, + params_misc_sz, 16, MIN_BOOTPARAM_ADDR, ULONG_MAX, 1, &bootparam_load_addr); if (ret) goto out_free_params; - pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - bootparam_load_addr, params_cmdline_sz, params_cmdline_sz); + pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + bootparam_load_addr, params_misc_sz, params_misc_sz); /* Load kernel */ kernel_buf = kernel + kern16_size; @@ -365,7 +485,9 @@ void *bzImage64_load(struct kimage *image, char *kernel, if (ret) goto out_free_params; - ret = setup_boot_parameters(image, params); + ret = setup_boot_parameters(image, params, bootparam_load_addr, + efi_map_offset, efi_map_sz, + efi_setup_data_offset); if (ret) goto out_free_params; -- cgit v1.2.2 From 8e7d838103feac320baf9e68d73f954840ac1eea Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:13 -0700 Subject: kexec: verify the signature of signed PE bzImage This is the final piece of the puzzle of verifying kernel image signature during kexec_file_load() syscall. This patch calls into PE file routines to verify signature of bzImage. If signature are valid, kexec_file_load() succeeds otherwise it fails. Two new config options have been introduced. First one is CONFIG_KEXEC_VERIFY_SIG. This option enforces that kernel has to be validly signed otherwise kernel load will fail. If this option is not set, no signature verification will be done. Only exception will be when secureboot is enabled. In that case signature verification should be automatically enforced when secureboot is enabled. But that will happen when secureboot patches are merged. Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG. This option enables signature verification support on bzImage. If this option is not set and previous one is set, kernel image loading will fail because kernel does not have support to verify signature of bzImage. I tested these patches with both "pesign" and "sbsign" signed bzImages. I used signing_key.priv key and signing_key.x509 cert for signing as generated during kernel build process (if module signing is enabled). Used following method to sign bzImage. pesign ====== - Convert DER format cert to PEM format cert openssl x509 -in signing_key.x509 -inform DER -out signing_key.x509.PEM -outform PEM - Generate a .p12 file from existing cert and private key file openssl pkcs12 -export -out kernel-key.p12 -inkey signing_key.priv -in signing_key.x509.PEM - Import .p12 file into pesign db pk12util -i /tmp/kernel-key.p12 -d /etc/pki/pesign - Sign bzImage pesign -i /boot/vmlinuz-3.16.0-rc3+ -o /boot/vmlinuz-3.16.0-rc3+.signed.pesign -c "Glacier signing key - Magrathea" -s sbsign ====== sbsign --key signing_key.priv --cert signing_key.x509.PEM --output /boot/vmlinuz-3.16.0-rc3+.signed.sbsign /boot/vmlinuz-3.16.0-rc3+ Patch details: Well all the hard work is done in previous patches. Now bzImage loader has just call into that code and verify whether bzImage signature are valid or not. Also create two config options. First one is CONFIG_KEXEC_VERIFY_SIG. This option enforces that kernel has to be validly signed otherwise kernel load will fail. If this option is not set, no signature verification will be done. Only exception will be when secureboot is enabled. In that case signature verification should be automatically enforced when secureboot is enabled. But that will happen when secureboot patches are merged. Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG. This option enables signature verification support on bzImage. If this option is not set and previous one is set, kernel image loading will fail because kernel does not have support to verify signature of bzImage. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Cc: Matt Fleming Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 22 ++++++++++++++++++++++ arch/x86/kernel/kexec-bzimage64.c | 21 +++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++ 3 files changed, 54 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9558b9fcafbf..4aafd322e21e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1599,6 +1599,28 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_VERIFY_SIG + bool "Verify kernel signature during kexec_file_load() syscall" + depends on KEXEC + ---help--- + This option makes kernel signature verification mandatory for + kexec_file_load() syscall. If kernel is signature can not be + verified, kexec_file_load() will fail. + + This option enforces signature verification at generic level. + One needs to enable signature verification for type of kernel + image being loaded to make sure it works. For example, enable + bzImage signature verification option to be able to load and + verify signatures of bzImage. Otherwise kernel loading will fail. + +config KEXEC_BZIMAGE_VERIFY_SIG + bool "Enable bzImage signature verification support" + depends on KEXEC_VERIFY_SIG + depends on SIGNED_PE_FILE_VERIFICATION + select SYSTEM_TRUSTED_KEYRING + ---help--- + Enable bzImage signature verification support. + config CRASH_DUMP bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 623e6c58081f..9642b9b33655 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -525,8 +527,27 @@ int bzImage64_cleanup(void *loader_data) return 0; } +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG +int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) +{ + bool trusted; + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + system_trusted_keyring, &trusted); + if (ret < 0) + return ret; + if (!trusted) + return -EKEYREJECTED; + return 0; +} +#endif + struct kexec_file_ops kexec_bzImage64_ops = { .probe = bzImage64_probe, .load = bzImage64_load, .cleanup = bzImage64_cleanup, +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG + .verify_sig = bzImage64_verify_sig, +#endif }; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9330434da777..8b04018e5d1f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -372,6 +372,17 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } +int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel, + unsigned long kernel_len) +{ + if (!image->fops || !image->fops->verify_sig) { + pr_debug("kernel loader does not support signature verification."); + return -EKEYREJECTED; + } + + return image->fops->verify_sig(kernel, kernel_len); +} + /* * Apply purgatory relocations. * -- cgit v1.2.2 From 8d5999df35314607c38fbd6bdd709e25c3a4eeab Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 7 Aug 2014 17:06:06 +0100 Subject: x86/xen: resume timer irqs early If the timer irqs are resumed during device resume it is possible in certain circumstances for the resume to hang early on, before device interrupts are resumed. For an Ubuntu 14.04 PVHVM guest this would occur in ~0.5% of resume attempts. It is not entirely clear what is occuring the point of the hang but I think a task necessary for the resume calls schedule_timeout(), waiting for a timer interrupt (which never arrives). This failure may require specific tasks to be running on the other VCPUs to trigger (processes are not frozen during a suspend/resume if PREEMPT is disabled). Add IRQF_EARLY_RESUME to the timer interrupts so they are resumed in syscore_resume(). Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Cc: stable@vger.kernel.org --- arch/x86/xen/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 7b78f88c1707..5718b0b58b60 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -444,7 +444,7 @@ void xen_setup_timer(int cpu) irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| - IRQF_FORCE_RESUME, + IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, name, NULL); (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); -- cgit v1.2.2 From 7d951f3ccb0308c95bf76d5eef9886dea35a7013 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 5 Aug 2014 11:49:19 +0100 Subject: x86/xen: use vmap() to map grant table pages in PVH guests Commit b7dd0e350e0b (x86/xen: safely map and unmap grant frames when in atomic context) causes PVH guests to crash in arch_gnttab_map_shared() when they attempted to map the pages for the grant table. This use of a PV-specific function during the PVH grant table setup is non-obvious and not needed. The standard vmap() function does the right thing. Signed-off-by: David Vrabel Reported-by: Mukesh Rathor Tested-by: Mukesh Rathor Cc: stable@vger.kernel.org --- arch/x86/xen/grant-table.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c0413046483a..1580e7a5a4cf 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -118,6 +118,7 @@ static int __init xlated_setup_gnttab_pages(void) { struct page **pages; xen_pfn_t *pfns; + void *vaddr; int rc; unsigned int i; unsigned long nr_grant_frames = gnttab_max_grant_frames(); @@ -143,21 +144,20 @@ static int __init xlated_setup_gnttab_pages(void) for (i = 0; i < nr_grant_frames; i++) pfns[i] = page_to_pfn(pages[i]); - rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, - &xen_auto_xlat_grant_frames.vaddr); - - if (rc) { + vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); + if (!vaddr) { pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, nr_grant_frames, rc); free_xenballooned_pages(nr_grant_frames, pages); kfree(pages); kfree(pfns); - return rc; + return -ENOMEM; } kfree(pages); xen_auto_xlat_grant_frames.pfn = pfns; xen_auto_xlat_grant_frames.count = nr_grant_frames; + xen_auto_xlat_grant_frames.vaddr = vaddr; return 0; } -- cgit v1.2.2 From 9baa3c34ac4e27f7e062f266f50cc5dbea26a6c1 Mon Sep 17 00:00:00 2001 From: Benoit Taine Date: Fri, 8 Aug 2014 15:56:03 +0200 Subject: PCI: Remove DEFINE_PCI_DEVICE_TABLE macro use We should prefer `struct pci_device_id` over `DEFINE_PCI_DEVICE_TABLE` to meet kernel coding style guidelines. This issue was reported by checkpatch. A simplified version of the semantic patch that makes this change is as follows (http://coccinelle.lip6.fr/): // @@ identifier i; declarer name DEFINE_PCI_DEVICE_TABLE; initializer z; @@ - DEFINE_PCI_DEVICE_TABLE(i) + const struct pci_device_id i[] = z; // [bhelgaas: add semantic patch] Signed-off-by: Benoit Taine Signed-off-by: Bjorn Helgaas --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 10 +++++----- arch/x86/kernel/iosf_mbi.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index cfc6f9dfcd90..0939f86f543d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -945,7 +945,7 @@ static struct intel_uncore_type *snbep_pci_uncores[] = { NULL, }; -static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { +static const struct pci_device_id snbep_uncore_pci_ids[] = { { /* Home Agent */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), @@ -1510,7 +1510,7 @@ static struct intel_uncore_type *ivt_pci_uncores[] = { NULL, }; -static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { +static const struct pci_device_id ivt_uncore_pci_ids[] = { { /* Home Agent 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0), @@ -1985,7 +1985,7 @@ static struct intel_uncore_type *snb_pci_uncores[] = { NULL, }; -static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { +static const struct pci_device_id snb_uncore_pci_ids[] = { { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), @@ -1993,7 +1993,7 @@ static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { { /* end: all zeroes */ }, }; -static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { +static const struct pci_device_id ivb_uncore_pci_ids[] = { { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), @@ -2001,7 +2001,7 @@ static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { { /* end: all zeroes */ }, }; -static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { +static const struct pci_device_id hsw_uncore_pci_ids[] = { { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c index d30acdc1229d..9030e83db6ee 100644 --- a/arch/x86/kernel/iosf_mbi.c +++ b/arch/x86/kernel/iosf_mbi.c @@ -202,7 +202,7 @@ static int iosf_mbi_probe(struct pci_dev *pdev, return 0; } -static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = { +static const struct pci_device_id iosf_mbi_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, { 0, }, -- cgit v1.2.2 From 8c058d53f6f2eb0b9cd3bc4ce5c053a64dded671 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 31 Jul 2014 15:21:24 -0400 Subject: intel_idle: Disable Baytrail Core and Module C6 auto-demotion Power efficiency improves on Baytrail (Intel Atom Processor E3000) when Linux disables C6 auto-demotion. Based on work by Srinidhi Kasagar . Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/uapi/asm/msr-index.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index fcf2b3ae1bf0..4a7aecd2ea13 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -149,6 +149,9 @@ #define MSR_CORE_C1_RES 0x00000660 +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + #define MSR_AMD64_MC0_MASK 0xc0010044 #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) -- cgit v1.2.2