diff options
| -rw-r--r-- | arch/x86/include/asm/current.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/percpu.h | 26 | ||||
| -rw-r--r-- | arch/x86/include/asm/thread_info.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 3 | ||||
| -rw-r--r-- | include/linux/percpu-defs.h | 8 |
7 files changed, 40 insertions, 22 deletions
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index c68c361697e1..4d447b732d82 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h | |||
| @@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task); | |||
| 11 | 11 | ||
| 12 | static __always_inline struct task_struct *get_current(void) | 12 | static __always_inline struct task_struct *get_current(void) |
| 13 | { | 13 | { |
| 14 | return percpu_read(current_task); | 14 | return percpu_read_stable(current_task); |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | #define current get_current() | 17 | #define current get_current() |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1ddb0d85..04eacefcfd26 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
| @@ -49,7 +49,7 @@ | |||
| 49 | #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x | 49 | #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x |
| 50 | #define __my_cpu_offset percpu_read(this_cpu_off) | 50 | #define __my_cpu_offset percpu_read(this_cpu_off) |
| 51 | #else | 51 | #else |
| 52 | #define __percpu_arg(x) "%" #x | 52 | #define __percpu_arg(x) "%P" #x |
| 53 | #endif | 53 | #endif |
| 54 | 54 | ||
| 55 | /* | 55 | /* |
| @@ -104,36 +104,48 @@ do { \ | |||
| 104 | } \ | 104 | } \ |
| 105 | } while (0) | 105 | } while (0) |
| 106 | 106 | ||
| 107 | #define percpu_from_op(op, var) \ | 107 | #define percpu_from_op(op, var, constraint) \ |
| 108 | ({ \ | 108 | ({ \ |
| 109 | typeof(var) ret__; \ | 109 | typeof(var) ret__; \ |
| 110 | switch (sizeof(var)) { \ | 110 | switch (sizeof(var)) { \ |
| 111 | case 1: \ | 111 | case 1: \ |
| 112 | asm(op "b "__percpu_arg(1)",%0" \ | 112 | asm(op "b "__percpu_arg(1)",%0" \ |
| 113 | : "=q" (ret__) \ | 113 | : "=q" (ret__) \ |
| 114 | : "m" (var)); \ | 114 | : constraint); \ |
| 115 | break; \ | 115 | break; \ |
| 116 | case 2: \ | 116 | case 2: \ |
| 117 | asm(op "w "__percpu_arg(1)",%0" \ | 117 | asm(op "w "__percpu_arg(1)",%0" \ |
| 118 | : "=r" (ret__) \ | 118 | : "=r" (ret__) \ |
| 119 | : "m" (var)); \ | 119 | : constraint); \ |
| 120 | break; \ | 120 | break; \ |
| 121 | case 4: \ | 121 | case 4: \ |
| 122 | asm(op "l "__percpu_arg(1)",%0" \ | 122 | asm(op "l "__percpu_arg(1)",%0" \ |
| 123 | : "=r" (ret__) \ | 123 | : "=r" (ret__) \ |
| 124 | : "m" (var)); \ | 124 | : constraint); \ |
| 125 | break; \ | 125 | break; \ |
| 126 | case 8: \ | 126 | case 8: \ |
| 127 | asm(op "q "__percpu_arg(1)",%0" \ | 127 | asm(op "q "__percpu_arg(1)",%0" \ |
| 128 | : "=r" (ret__) \ | 128 | : "=r" (ret__) \ |
| 129 | : "m" (var)); \ | 129 | : constraint); \ |
| 130 | break; \ | 130 | break; \ |
| 131 | default: __bad_percpu_size(); \ | 131 | default: __bad_percpu_size(); \ |
| 132 | } \ | 132 | } \ |
| 133 | ret__; \ | 133 | ret__; \ |
| 134 | }) | 134 | }) |
| 135 | 135 | ||
| 136 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var) | 136 | /* |
| 137 | * percpu_read() makes gcc load the percpu variable every time it is | ||
| 138 | * accessed while percpu_read_stable() allows the value to be cached. | ||
| 139 | * percpu_read_stable() is more efficient and can be used if its value | ||
| 140 | * is guaranteed to be valid across cpus. The current users include | ||
| 141 | * get_current() and get_thread_info() both of which are actually | ||
| 142 | * per-thread variables implemented as per-cpu variables and thus | ||
| 143 | * stable for the duration of the respective task. | ||
| 144 | */ | ||
| 145 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ | ||
| 146 | "m" (per_cpu__##var)) | ||
| 147 | #define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ | ||
| 148 | "p" (&per_cpu__##var)) | ||
| 137 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) | 149 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) |
| 138 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) | 150 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) |
| 139 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) | 151 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 6f7786aea4fc..d27d0a2fec4c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -214,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); | |||
| 214 | static inline struct thread_info *current_thread_info(void) | 214 | static inline struct thread_info *current_thread_info(void) |
| 215 | { | 215 | { |
| 216 | struct thread_info *ti; | 216 | struct thread_info *ti; |
| 217 | ti = (void *)(percpu_read(kernel_stack) + | 217 | ti = (void *)(percpu_read_stable(kernel_stack) + |
| 218 | KERNEL_STACK_OFFSET - THREAD_SIZE); | 218 | KERNEL_STACK_OFFSET - THREAD_SIZE); |
| 219 | return ti; | 219 | return ti; |
| 220 | } | 220 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 55a6abe40394..2055fc2b2e6b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | |||
| 987 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 987 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
| 988 | irq_stack_union) __aligned(PAGE_SIZE); | 988 | irq_stack_union) __aligned(PAGE_SIZE); |
| 989 | 989 | ||
| 990 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 990 | /* |
| 991 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 991 | * The following four percpu variables are hot. Align current_task to |
| 992 | * cacheline size such that all four fall in the same cacheline. | ||
| 993 | */ | ||
| 994 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | ||
| 995 | &init_task; | ||
| 996 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
| 992 | 997 | ||
| 993 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 998 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
| 994 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 999 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
| 995 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | 1000 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
| 996 | 1001 | ||
| 1002 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | ||
| 1003 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | ||
| 1004 | |||
| 997 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1005 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
| 998 | 1006 | ||
| 999 | /* | 1007 | /* |
| @@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | |||
| 1008 | }; | 1016 | }; |
| 1009 | 1017 | ||
| 1010 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 1018 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
| 1011 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) | 1019 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
| 1012 | __aligned(PAGE_SIZE); | ||
| 1013 | 1020 | ||
| 1014 | /* May not be marked __init: used by software suspend */ | 1021 | /* May not be marked __init: used by software suspend */ |
| 1015 | void syscall_init(void) | 1022 | void syscall_init(void) |
| @@ -1042,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); | |||
| 1042 | 1049 | ||
| 1043 | #else /* CONFIG_X86_64 */ | 1050 | #else /* CONFIG_X86_64 */ |
| 1044 | 1051 | ||
| 1052 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
| 1053 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
| 1054 | |||
| 1045 | #ifdef CONFIG_CC_STACKPROTECTOR | 1055 | #ifdef CONFIG_CC_STACKPROTECTOR |
| 1046 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 1056 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
| 1047 | #endif | 1057 | #endif |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a80eddd41658..4cf79567cdab 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -61,9 +61,6 @@ | |||
| 61 | 61 | ||
| 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| 63 | 63 | ||
| 64 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
| 65 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
| 66 | |||
| 67 | /* | 64 | /* |
| 68 | * Return saved PC of a blocked thread. | 65 | * Return saved PC of a blocked thread. |
| 69 | */ | 66 | */ |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a28279dbb07c..ad535b683170 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -55,9 +55,6 @@ | |||
| 55 | 55 | ||
| 56 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
| 57 | 57 | ||
| 58 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
| 59 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
| 60 | |||
| 61 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
| 62 | static DEFINE_PER_CPU(unsigned char, is_idle); | 59 | static DEFINE_PER_CPU(unsigned char, is_idle); |
| 63 | 60 | ||
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 3058cf9dd3d4..0761491b3eec 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h | |||
| @@ -77,11 +77,13 @@ | |||
| 77 | /* | 77 | /* |
| 78 | * Declaration/definition used for per-CPU variables that must be page aligned. | 78 | * Declaration/definition used for per-CPU variables that must be page aligned. |
| 79 | */ | 79 | */ |
| 80 | #define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ | 80 | #define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ |
| 81 | DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") | 81 | DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") \ |
| 82 | __aligned(PAGE_SIZE) | ||
| 82 | 83 | ||
| 83 | #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ | 84 | #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ |
| 84 | DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") | 85 | DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") \ |
| 86 | __aligned(PAGE_SIZE) | ||
| 85 | 87 | ||
| 86 | /* | 88 | /* |
| 87 | * Intermodule exports for per-CPU variables. | 89 | * Intermodule exports for per-CPU variables. |
