diff options
64 files changed, 1978 insertions, 740 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl index 2428cc04dbc8..f3abca7ec53d 100644 --- a/Documentation/DocBook/kgdb.tmpl +++ b/Documentation/DocBook/kgdb.tmpl | |||
| @@ -197,6 +197,7 @@ | |||
| 197 | may be configured as a kernel built-in or a kernel loadable module. | 197 | may be configured as a kernel built-in or a kernel loadable module. |
| 198 | You can only make use of <constant>kgdbwait</constant> and early | 198 | You can only make use of <constant>kgdbwait</constant> and early |
| 199 | debugging if you build kgdboc into the kernel as a built-in. | 199 | debugging if you build kgdboc into the kernel as a built-in. |
| 200 | </para> | ||
| 200 | <para>Optionally you can elect to activate kms (Kernel Mode | 201 | <para>Optionally you can elect to activate kms (Kernel Mode |
| 201 | Setting) integration. When you use kms with kgdboc and you have a | 202 | Setting) integration. When you use kms with kgdboc and you have a |
| 202 | video driver that has atomic mode setting hooks, it is possible to | 203 | video driver that has atomic mode setting hooks, it is possible to |
| @@ -206,7 +207,6 @@ | |||
| 206 | crashes or doing analysis of memory with kdb while allowing the | 207 | crashes or doing analysis of memory with kdb while allowing the |
| 207 | full graphics console applications to run. | 208 | full graphics console applications to run. |
| 208 | </para> | 209 | </para> |
| 209 | </para> | ||
| 210 | <sect2 id="kgdbocArgs"> | 210 | <sect2 id="kgdbocArgs"> |
| 211 | <title>kgdboc arguments</title> | 211 | <title>kgdboc arguments</title> |
| 212 | <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para> | 212 | <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para> |
| @@ -284,7 +284,6 @@ | |||
| 284 | </listitem> | 284 | </listitem> |
| 285 | </orderedlist> | 285 | </orderedlist> |
| 286 | </para> | 286 | </para> |
| 287 | </sect3> | ||
| 288 | <para>NOTE: Kgdboc does not support interrupting the target via the | 287 | <para>NOTE: Kgdboc does not support interrupting the target via the |
| 289 | gdb remote protocol. You must manually send a sysrq-g unless you | 288 | gdb remote protocol. You must manually send a sysrq-g unless you |
| 290 | have a proxy that splits console output to a terminal program. | 289 | have a proxy that splits console output to a terminal program. |
| @@ -305,6 +304,7 @@ | |||
| 305 | as well as on the initial connect, or to use a debugger proxy that | 304 | as well as on the initial connect, or to use a debugger proxy that |
| 306 | allows an unmodified gdb to do the debugging. | 305 | allows an unmodified gdb to do the debugging. |
| 307 | </para> | 306 | </para> |
| 307 | </sect3> | ||
| 308 | </sect2> | 308 | </sect2> |
| 309 | </sect1> | 309 | </sect1> |
| 310 | <sect1 id="kgdbwait"> | 310 | <sect1 id="kgdbwait"> |
| @@ -350,12 +350,12 @@ | |||
| 350 | </para> | 350 | </para> |
| 351 | </listitem> | 351 | </listitem> |
| 352 | </orderedlist> | 352 | </orderedlist> |
| 353 | </para> | ||
| 353 | <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an | 354 | <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an |
| 354 | active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant> | 355 | active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant> |
| 355 | </para> | 356 | </para> |
| 356 | <para>It is possible to use this option with kgdboc on a tty that is not a system console. | 357 | <para>It is possible to use this option with kgdboc on a tty that is not a system console. |
| 357 | </para> | 358 | </para> |
| 358 | </para> | ||
| 359 | </sect1> | 359 | </sect1> |
| 360 | <sect1 id="kgdbreboot"> | 360 | <sect1 id="kgdbreboot"> |
| 361 | <title>Run time parameter: kgdbreboot</title> | 361 | <title>Run time parameter: kgdbreboot</title> |
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 199f453cb4de..82fbdbc1e0b0 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt | |||
| @@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit | |||
| 3 | real-mode setup code of the kernel. References/settings to it mainly | 3 | real-mode setup code of the kernel. References/settings to it mainly |
| 4 | are in: | 4 | are in: |
| 5 | 5 | ||
| 6 | arch/x86/include/asm/bootparam.h | 6 | arch/x86/include/uapi/asm/bootparam.h |
| 7 | 7 | ||
| 8 | 8 | ||
| 9 | Offset Proto Name Meaning | 9 | Offset Proto Name Meaning |
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c4fbb9527c5c..b1453a2ae1ca 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h | |||
| @@ -18,15 +18,15 @@ struct cpu_topology_s390 { | |||
| 18 | cpumask_t book_mask; | 18 | cpumask_t book_mask; |
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; | 21 | DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); |
| 22 | 22 | ||
| 23 | #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) | 23 | #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id) |
| 24 | #define topology_thread_id(cpu) (cpu_topology[cpu].thread_id) | 24 | #define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id) |
| 25 | #define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_mask) | 25 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask) |
| 26 | #define topology_core_id(cpu) (cpu_topology[cpu].core_id) | 26 | #define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id) |
| 27 | #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask) | 27 | #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) |
| 28 | #define topology_book_id(cpu) (cpu_topology[cpu].book_id) | 28 | #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) |
| 29 | #define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask) | 29 | #define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask) |
| 30 | 30 | ||
| 31 | #define mc_capable() 1 | 31 | #define mc_capable() 1 |
| 32 | 32 | ||
| @@ -51,14 +51,6 @@ static inline void topology_expect_change(void) { } | |||
| 51 | #define POLARIZATION_VM (2) | 51 | #define POLARIZATION_VM (2) |
| 52 | #define POLARIZATION_VH (3) | 52 | #define POLARIZATION_VH (3) |
| 53 | 53 | ||
| 54 | #ifdef CONFIG_SCHED_BOOK | ||
| 55 | void s390_init_cpu_topology(void); | ||
| 56 | #else | ||
| 57 | static inline void s390_init_cpu_topology(void) | ||
| 58 | { | ||
| 59 | }; | ||
| 60 | #endif | ||
| 61 | |||
| 62 | #include <asm-generic/topology.h> | 54 | #include <asm-generic/topology.h> |
| 63 | 55 | ||
| 64 | #endif /* _ASM_S390_TOPOLOGY_H */ | 56 | #endif /* _ASM_S390_TOPOLOGY_H */ |
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 632fa06ea162..0969d113b3d6 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c | |||
| @@ -91,12 +91,9 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level) | |||
| 91 | { | 91 | { |
| 92 | if (level >= CACHE_MAX_LEVEL) | 92 | if (level >= CACHE_MAX_LEVEL) |
| 93 | return CACHE_TYPE_NOCACHE; | 93 | return CACHE_TYPE_NOCACHE; |
| 94 | |||
| 95 | ci += level; | 94 | ci += level; |
| 96 | |||
| 97 | if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE) | 95 | if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE) |
| 98 | return CACHE_TYPE_NOCACHE; | 96 | return CACHE_TYPE_NOCACHE; |
| 99 | |||
| 100 | return cache_type_map[ci->type]; | 97 | return cache_type_map[ci->type]; |
| 101 | } | 98 | } |
| 102 | 99 | ||
| @@ -111,23 +108,19 @@ static inline unsigned long ecag(int ai, int li, int ti) | |||
| 111 | } | 108 | } |
| 112 | 109 | ||
| 113 | static void ci_leaf_init(struct cacheinfo *this_leaf, int private, | 110 | static void ci_leaf_init(struct cacheinfo *this_leaf, int private, |
| 114 | enum cache_type type, unsigned int level) | 111 | enum cache_type type, unsigned int level, int cpu) |
| 115 | { | 112 | { |
| 116 | int ti, num_sets; | 113 | int ti, num_sets; |
| 117 | int cpu = smp_processor_id(); | ||
| 118 | 114 | ||
| 119 | if (type == CACHE_TYPE_INST) | 115 | if (type == CACHE_TYPE_INST) |
| 120 | ti = CACHE_TI_INSTRUCTION; | 116 | ti = CACHE_TI_INSTRUCTION; |
| 121 | else | 117 | else |
| 122 | ti = CACHE_TI_UNIFIED; | 118 | ti = CACHE_TI_UNIFIED; |
| 123 | |||
| 124 | this_leaf->level = level + 1; | 119 | this_leaf->level = level + 1; |
| 125 | this_leaf->type = type; | 120 | this_leaf->type = type; |
| 126 | this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti); | 121 | this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti); |
| 127 | this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, | 122 | this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); |
| 128 | level, ti); | ||
| 129 | this_leaf->size = ecag(EXTRACT_SIZE, level, ti); | 123 | this_leaf->size = ecag(EXTRACT_SIZE, level, ti); |
| 130 | |||
| 131 | num_sets = this_leaf->size / this_leaf->coherency_line_size; | 124 | num_sets = this_leaf->size / this_leaf->coherency_line_size; |
| 132 | num_sets /= this_leaf->ways_of_associativity; | 125 | num_sets /= this_leaf->ways_of_associativity; |
| 133 | this_leaf->number_of_sets = num_sets; | 126 | this_leaf->number_of_sets = num_sets; |
| @@ -145,7 +138,6 @@ int init_cache_level(unsigned int cpu) | |||
| 145 | 138 | ||
| 146 | if (!this_cpu_ci) | 139 | if (!this_cpu_ci) |
| 147 | return -EINVAL; | 140 | return -EINVAL; |
| 148 | |||
| 149 | ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); | 141 | ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); |
| 150 | do { | 142 | do { |
| 151 | ctype = get_cache_type(&ct.ci[0], level); | 143 | ctype = get_cache_type(&ct.ci[0], level); |
| @@ -154,34 +146,31 @@ int init_cache_level(unsigned int cpu) | |||
| 154 | /* Separate instruction and data caches */ | 146 | /* Separate instruction and data caches */ |
| 155 | leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; | 147 | leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; |
| 156 | } while (++level < CACHE_MAX_LEVEL); | 148 | } while (++level < CACHE_MAX_LEVEL); |
| 157 | |||
| 158 | this_cpu_ci->num_levels = level; | 149 | this_cpu_ci->num_levels = level; |
| 159 | this_cpu_ci->num_leaves = leaves; | 150 | this_cpu_ci->num_leaves = leaves; |
| 160 | |||
| 161 | return 0; | 151 | return 0; |
| 162 | } | 152 | } |
| 163 | 153 | ||
| 164 | int populate_cache_leaves(unsigned int cpu) | 154 | int populate_cache_leaves(unsigned int cpu) |
| 165 | { | 155 | { |
| 156 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); | ||
| 157 | struct cacheinfo *this_leaf = this_cpu_ci->info_list; | ||
| 166 | unsigned int level, idx, pvt; | 158 | unsigned int level, idx, pvt; |
| 167 | union cache_topology ct; | 159 | union cache_topology ct; |
| 168 | enum cache_type ctype; | 160 | enum cache_type ctype; |
| 169 | struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); | ||
| 170 | struct cacheinfo *this_leaf = this_cpu_ci->info_list; | ||
| 171 | 161 | ||
| 172 | ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); | 162 | ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); |
| 173 | for (idx = 0, level = 0; level < this_cpu_ci->num_levels && | 163 | for (idx = 0, level = 0; level < this_cpu_ci->num_levels && |
| 174 | idx < this_cpu_ci->num_leaves; idx++, level++) { | 164 | idx < this_cpu_ci->num_leaves; idx++, level++) { |
| 175 | if (!this_leaf) | 165 | if (!this_leaf) |
| 176 | return -EINVAL; | 166 | return -EINVAL; |
| 177 | |||
| 178 | pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0; | 167 | pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0; |
| 179 | ctype = get_cache_type(&ct.ci[0], level); | 168 | ctype = get_cache_type(&ct.ci[0], level); |
| 180 | if (ctype == CACHE_TYPE_SEPARATE) { | 169 | if (ctype == CACHE_TYPE_SEPARATE) { |
| 181 | ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level); | 170 | ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu); |
| 182 | ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level); | 171 | ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu); |
| 183 | } else { | 172 | } else { |
| 184 | ci_leaf_init(this_leaf++, pvt, ctype, level); | 173 | ci_leaf_init(this_leaf++, pvt, ctype, level, cpu); |
| 185 | } | 174 | } |
| 186 | } | 175 | } |
| 187 | return 0; | 176 | return 0; |
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 70a329450901..4427ab7ac23a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c | |||
| @@ -393,17 +393,19 @@ static __init void detect_machine_facilities(void) | |||
| 393 | S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; | 393 | S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; |
| 394 | if (test_facility(129)) | 394 | if (test_facility(129)) |
| 395 | S390_lowcore.machine_flags |= MACHINE_FLAG_VX; | 395 | S390_lowcore.machine_flags |= MACHINE_FLAG_VX; |
| 396 | if (test_facility(128)) | ||
| 397 | S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; | ||
| 398 | #endif | 396 | #endif |
| 399 | } | 397 | } |
| 400 | 398 | ||
| 401 | static int __init nocad_setup(char *str) | 399 | static int __init cad_setup(char *str) |
| 402 | { | 400 | { |
| 403 | S390_lowcore.machine_flags &= ~MACHINE_FLAG_CAD; | 401 | int val; |
| 402 | |||
| 403 | get_option(&str, &val); | ||
| 404 | if (val && test_facility(128)) | ||
| 405 | S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; | ||
| 404 | return 0; | 406 | return 0; |
| 405 | } | 407 | } |
| 406 | early_param("nocad", nocad_setup); | 408 | early_param("cad", cad_setup); |
| 407 | 409 | ||
| 408 | static int __init cad_init(void) | 410 | static int __init cad_init(void) |
| 409 | { | 411 | { |
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index bfac77ada4f2..a5ea8bc17cb3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c | |||
| @@ -909,7 +909,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 909 | setup_lowcore(); | 909 | setup_lowcore(); |
| 910 | smp_fill_possible_mask(); | 910 | smp_fill_possible_mask(); |
| 911 | cpu_init(); | 911 | cpu_init(); |
| 912 | s390_init_cpu_topology(); | ||
| 913 | 912 | ||
| 914 | /* | 913 | /* |
| 915 | * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). | 914 | * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a668993ff577..db8f1115a3bf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
| @@ -59,14 +59,13 @@ enum { | |||
| 59 | CPU_STATE_CONFIGURED, | 59 | CPU_STATE_CONFIGURED, |
| 60 | }; | 60 | }; |
| 61 | 61 | ||
| 62 | static DEFINE_PER_CPU(struct cpu *, cpu_device); | ||
| 63 | |||
| 62 | struct pcpu { | 64 | struct pcpu { |
| 63 | struct cpu *cpu; | ||
| 64 | struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ | 65 | struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ |
| 65 | unsigned long async_stack; /* async stack for the cpu */ | ||
| 66 | unsigned long panic_stack; /* panic stack for the cpu */ | ||
| 67 | unsigned long ec_mask; /* bit mask for ec_xxx functions */ | 66 | unsigned long ec_mask; /* bit mask for ec_xxx functions */ |
| 68 | int state; /* physical cpu state */ | 67 | signed char state; /* physical cpu state */ |
| 69 | int polarization; /* physical polarization */ | 68 | signed char polarization; /* physical polarization */ |
| 70 | u16 address; /* physical cpu address */ | 69 | u16 address; /* physical cpu address */ |
| 71 | }; | 70 | }; |
| 72 | 71 | ||
| @@ -173,25 +172,30 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) | |||
| 173 | pcpu_sigp_retry(pcpu, order, 0); | 172 | pcpu_sigp_retry(pcpu, order, 0); |
| 174 | } | 173 | } |
| 175 | 174 | ||
| 175 | #define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) | ||
| 176 | #define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) | ||
| 177 | |||
| 176 | static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) | 178 | static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) |
| 177 | { | 179 | { |
| 180 | unsigned long async_stack, panic_stack; | ||
| 178 | struct _lowcore *lc; | 181 | struct _lowcore *lc; |
| 179 | 182 | ||
| 180 | if (pcpu != &pcpu_devices[0]) { | 183 | if (pcpu != &pcpu_devices[0]) { |
| 181 | pcpu->lowcore = (struct _lowcore *) | 184 | pcpu->lowcore = (struct _lowcore *) |
| 182 | __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); | 185 | __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); |
| 183 | pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); | 186 | async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); |
| 184 | pcpu->panic_stack = __get_free_page(GFP_KERNEL); | 187 | panic_stack = __get_free_page(GFP_KERNEL); |
| 185 | if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) | 188 | if (!pcpu->lowcore || !panic_stack || !async_stack) |
| 186 | goto out; | 189 | goto out; |
| 190 | } else { | ||
| 191 | async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; | ||
| 192 | panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; | ||
| 187 | } | 193 | } |
| 188 | lc = pcpu->lowcore; | 194 | lc = pcpu->lowcore; |
| 189 | memcpy(lc, &S390_lowcore, 512); | 195 | memcpy(lc, &S390_lowcore, 512); |
| 190 | memset((char *) lc + 512, 0, sizeof(*lc) - 512); | 196 | memset((char *) lc + 512, 0, sizeof(*lc) - 512); |
| 191 | lc->async_stack = pcpu->async_stack + ASYNC_SIZE | 197 | lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; |
| 192 | - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); | 198 | lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; |
| 193 | lc->panic_stack = pcpu->panic_stack + PAGE_SIZE | ||
| 194 | - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); | ||
| 195 | lc->cpu_nr = cpu; | 199 | lc->cpu_nr = cpu; |
| 196 | lc->spinlock_lockval = arch_spin_lockval(cpu); | 200 | lc->spinlock_lockval = arch_spin_lockval(cpu); |
| 197 | #ifndef CONFIG_64BIT | 201 | #ifndef CONFIG_64BIT |
| @@ -212,8 +216,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) | |||
| 212 | return 0; | 216 | return 0; |
| 213 | out: | 217 | out: |
| 214 | if (pcpu != &pcpu_devices[0]) { | 218 | if (pcpu != &pcpu_devices[0]) { |
| 215 | free_page(pcpu->panic_stack); | 219 | free_page(panic_stack); |
| 216 | free_pages(pcpu->async_stack, ASYNC_ORDER); | 220 | free_pages(async_stack, ASYNC_ORDER); |
| 217 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); | 221 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); |
| 218 | } | 222 | } |
| 219 | return -ENOMEM; | 223 | return -ENOMEM; |
| @@ -235,11 +239,11 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) | |||
| 235 | #else | 239 | #else |
| 236 | vdso_free_per_cpu(pcpu->lowcore); | 240 | vdso_free_per_cpu(pcpu->lowcore); |
| 237 | #endif | 241 | #endif |
| 238 | if (pcpu != &pcpu_devices[0]) { | 242 | if (pcpu == &pcpu_devices[0]) |
| 239 | free_page(pcpu->panic_stack); | 243 | return; |
| 240 | free_pages(pcpu->async_stack, ASYNC_ORDER); | 244 | free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); |
| 241 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); | 245 | free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); |
| 242 | } | 246 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); |
| 243 | } | 247 | } |
| 244 | 248 | ||
| 245 | #endif /* CONFIG_HOTPLUG_CPU */ | 249 | #endif /* CONFIG_HOTPLUG_CPU */ |
| @@ -366,7 +370,8 @@ void smp_call_online_cpu(void (*func)(void *), void *data) | |||
| 366 | void smp_call_ipl_cpu(void (*func)(void *), void *data) | 370 | void smp_call_ipl_cpu(void (*func)(void *), void *data) |
| 367 | { | 371 | { |
| 368 | pcpu_delegate(&pcpu_devices[0], func, data, | 372 | pcpu_delegate(&pcpu_devices[0], func, data, |
| 369 | pcpu_devices->panic_stack + PAGE_SIZE); | 373 | pcpu_devices->lowcore->panic_stack - |
| 374 | PANIC_FRAME_OFFSET + PAGE_SIZE); | ||
| 370 | } | 375 | } |
| 371 | 376 | ||
| 372 | int smp_find_processor_id(u16 address) | 377 | int smp_find_processor_id(u16 address) |
| @@ -935,10 +940,6 @@ void __init smp_prepare_boot_cpu(void) | |||
| 935 | pcpu->state = CPU_STATE_CONFIGURED; | 940 | pcpu->state = CPU_STATE_CONFIGURED; |
| 936 | pcpu->address = stap(); | 941 | pcpu->address = stap(); |
| 937 | pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); | 942 | pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); |
| 938 | pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE | ||
| 939 | + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); | ||
| 940 | pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE | ||
| 941 | + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); | ||
| 942 | S390_lowcore.percpu_offset = __per_cpu_offset[0]; | 943 | S390_lowcore.percpu_offset = __per_cpu_offset[0]; |
| 943 | smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); | 944 | smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); |
| 944 | set_cpu_present(0, true); | 945 | set_cpu_present(0, true); |
| @@ -1078,8 +1079,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, | |||
| 1078 | void *hcpu) | 1079 | void *hcpu) |
| 1079 | { | 1080 | { |
| 1080 | unsigned int cpu = (unsigned int)(long)hcpu; | 1081 | unsigned int cpu = (unsigned int)(long)hcpu; |
| 1081 | struct cpu *c = pcpu_devices[cpu].cpu; | 1082 | struct device *s = &per_cpu(cpu_device, cpu)->dev; |
| 1082 | struct device *s = &c->dev; | ||
| 1083 | int err = 0; | 1083 | int err = 0; |
| 1084 | 1084 | ||
| 1085 | switch (action & ~CPU_TASKS_FROZEN) { | 1085 | switch (action & ~CPU_TASKS_FROZEN) { |
| @@ -1102,7 +1102,7 @@ static int smp_add_present_cpu(int cpu) | |||
| 1102 | c = kzalloc(sizeof(*c), GFP_KERNEL); | 1102 | c = kzalloc(sizeof(*c), GFP_KERNEL); |
| 1103 | if (!c) | 1103 | if (!c) |
| 1104 | return -ENOMEM; | 1104 | return -ENOMEM; |
| 1105 | pcpu_devices[cpu].cpu = c; | 1105 | per_cpu(cpu_device, cpu) = c; |
| 1106 | s = &c->dev; | 1106 | s = &c->dev; |
| 1107 | c->hotpluggable = 1; | 1107 | c->hotpluggable = 1; |
| 1108 | rc = register_cpu(c, cpu); | 1108 | rc = register_cpu(c, cpu); |
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 24ee33f1af24..14da43b801d9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c | |||
| @@ -7,14 +7,14 @@ | |||
| 7 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 7 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| 8 | 8 | ||
| 9 | #include <linux/workqueue.h> | 9 | #include <linux/workqueue.h> |
| 10 | #include <linux/bootmem.h> | ||
| 11 | #include <linux/cpuset.h> | 10 | #include <linux/cpuset.h> |
| 12 | #include <linux/device.h> | 11 | #include <linux/device.h> |
| 13 | #include <linux/export.h> | 12 | #include <linux/export.h> |
| 14 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
| 15 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
| 16 | #include <linux/init.h> | ||
| 17 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
| 16 | #include <linux/init.h> | ||
| 17 | #include <linux/slab.h> | ||
| 18 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
| @@ -42,8 +42,8 @@ static DEFINE_SPINLOCK(topology_lock); | |||
| 42 | static struct mask_info socket_info; | 42 | static struct mask_info socket_info; |
| 43 | static struct mask_info book_info; | 43 | static struct mask_info book_info; |
| 44 | 44 | ||
| 45 | struct cpu_topology_s390 cpu_topology[NR_CPUS]; | 45 | DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); |
| 46 | EXPORT_SYMBOL_GPL(cpu_topology); | 46 | EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); |
| 47 | 47 | ||
| 48 | static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) | 48 | static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) |
| 49 | { | 49 | { |
| @@ -90,15 +90,15 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, | |||
| 90 | if (lcpu < 0) | 90 | if (lcpu < 0) |
| 91 | continue; | 91 | continue; |
| 92 | for (i = 0; i <= smp_cpu_mtid; i++) { | 92 | for (i = 0; i <= smp_cpu_mtid; i++) { |
| 93 | cpu_topology[lcpu + i].book_id = book->id; | 93 | per_cpu(cpu_topology, lcpu + i).book_id = book->id; |
| 94 | cpu_topology[lcpu + i].core_id = rcore; | 94 | per_cpu(cpu_topology, lcpu + i).core_id = rcore; |
| 95 | cpu_topology[lcpu + i].thread_id = lcpu + i; | 95 | per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i; |
| 96 | cpumask_set_cpu(lcpu + i, &book->mask); | 96 | cpumask_set_cpu(lcpu + i, &book->mask); |
| 97 | cpumask_set_cpu(lcpu + i, &socket->mask); | 97 | cpumask_set_cpu(lcpu + i, &socket->mask); |
| 98 | if (one_socket_per_cpu) | 98 | if (one_socket_per_cpu) |
| 99 | cpu_topology[lcpu + i].socket_id = rcore; | 99 | per_cpu(cpu_topology, lcpu + i).socket_id = rcore; |
| 100 | else | 100 | else |
| 101 | cpu_topology[lcpu + i].socket_id = socket->id; | 101 | per_cpu(cpu_topology, lcpu + i).socket_id = socket->id; |
| 102 | smp_cpu_set_polarization(lcpu + i, tl_core->pp); | 102 | smp_cpu_set_polarization(lcpu + i, tl_core->pp); |
| 103 | } | 103 | } |
| 104 | if (one_socket_per_cpu) | 104 | if (one_socket_per_cpu) |
| @@ -249,14 +249,14 @@ static void update_cpu_masks(void) | |||
| 249 | 249 | ||
| 250 | spin_lock_irqsave(&topology_lock, flags); | 250 | spin_lock_irqsave(&topology_lock, flags); |
| 251 | for_each_possible_cpu(cpu) { | 251 | for_each_possible_cpu(cpu) { |
| 252 | cpu_topology[cpu].thread_mask = cpu_thread_map(cpu); | 252 | per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu); |
| 253 | cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); | 253 | per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu); |
| 254 | cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); | 254 | per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu); |
| 255 | if (!MACHINE_HAS_TOPOLOGY) { | 255 | if (!MACHINE_HAS_TOPOLOGY) { |
| 256 | cpu_topology[cpu].thread_id = cpu; | 256 | per_cpu(cpu_topology, cpu).thread_id = cpu; |
| 257 | cpu_topology[cpu].core_id = cpu; | 257 | per_cpu(cpu_topology, cpu).core_id = cpu; |
| 258 | cpu_topology[cpu].socket_id = cpu; | 258 | per_cpu(cpu_topology, cpu).socket_id = cpu; |
| 259 | cpu_topology[cpu].book_id = cpu; | 259 | per_cpu(cpu_topology, cpu).book_id = cpu; |
| 260 | } | 260 | } |
| 261 | } | 261 | } |
| 262 | spin_unlock_irqrestore(&topology_lock, flags); | 262 | spin_unlock_irqrestore(&topology_lock, flags); |
| @@ -334,50 +334,6 @@ void topology_expect_change(void) | |||
| 334 | set_topology_timer(); | 334 | set_topology_timer(); |
| 335 | } | 335 | } |
| 336 | 336 | ||
| 337 | static int __init early_parse_topology(char *p) | ||
| 338 | { | ||
| 339 | if (strncmp(p, "off", 3)) | ||
| 340 | return 0; | ||
| 341 | topology_enabled = 0; | ||
| 342 | return 0; | ||
| 343 | } | ||
| 344 | early_param("topology", early_parse_topology); | ||
| 345 | |||
| 346 | static void __init alloc_masks(struct sysinfo_15_1_x *info, | ||
| 347 | struct mask_info *mask, int offset) | ||
| 348 | { | ||
| 349 | int i, nr_masks; | ||
| 350 | |||
| 351 | nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; | ||
| 352 | for (i = 0; i < info->mnest - offset; i++) | ||
| 353 | nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; | ||
| 354 | nr_masks = max(nr_masks, 1); | ||
| 355 | for (i = 0; i < nr_masks; i++) { | ||
| 356 | mask->next = alloc_bootmem_align( | ||
| 357 | roundup_pow_of_two(sizeof(struct mask_info)), | ||
| 358 | roundup_pow_of_two(sizeof(struct mask_info))); | ||
| 359 | mask = mask->next; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | |||
| 363 | void __init s390_init_cpu_topology(void) | ||
| 364 | { | ||
| 365 | struct sysinfo_15_1_x *info; | ||
| 366 | int i; | ||
| 367 | |||
| 368 | if (!MACHINE_HAS_TOPOLOGY) | ||
| 369 | return; | ||
| 370 | tl_info = alloc_bootmem_pages(PAGE_SIZE); | ||
| 371 | info = tl_info; | ||
| 372 | store_topology(info); | ||
| 373 | pr_info("The CPU configuration topology of the machine is:"); | ||
| 374 | for (i = 0; i < TOPOLOGY_NR_MAG; i++) | ||
| 375 | printk(KERN_CONT " %d", info->mag[i]); | ||
| 376 | printk(KERN_CONT " / %d\n", info->mnest); | ||
| 377 | alloc_masks(info, &socket_info, 1); | ||
| 378 | alloc_masks(info, &book_info, 2); | ||
| 379 | } | ||
| 380 | |||
| 381 | static int cpu_management; | 337 | static int cpu_management; |
| 382 | 338 | ||
| 383 | static ssize_t dispatching_show(struct device *dev, | 339 | static ssize_t dispatching_show(struct device *dev, |
| @@ -467,20 +423,29 @@ int topology_cpu_init(struct cpu *cpu) | |||
| 467 | 423 | ||
| 468 | const struct cpumask *cpu_thread_mask(int cpu) | 424 | const struct cpumask *cpu_thread_mask(int cpu) |
| 469 | { | 425 | { |
| 470 | return &cpu_topology[cpu].thread_mask; | 426 | return &per_cpu(cpu_topology, cpu).thread_mask; |
| 471 | } | 427 | } |
| 472 | 428 | ||
| 473 | 429 | ||
| 474 | const struct cpumask *cpu_coregroup_mask(int cpu) | 430 | const struct cpumask *cpu_coregroup_mask(int cpu) |
| 475 | { | 431 | { |
| 476 | return &cpu_topology[cpu].core_mask; | 432 | return &per_cpu(cpu_topology, cpu).core_mask; |
| 477 | } | 433 | } |
| 478 | 434 | ||
| 479 | static const struct cpumask *cpu_book_mask(int cpu) | 435 | static const struct cpumask *cpu_book_mask(int cpu) |
| 480 | { | 436 | { |
| 481 | return &cpu_topology[cpu].book_mask; | 437 | return &per_cpu(cpu_topology, cpu).book_mask; |
| 482 | } | 438 | } |
| 483 | 439 | ||
| 440 | static int __init early_parse_topology(char *p) | ||
| 441 | { | ||
| 442 | if (strncmp(p, "off", 3)) | ||
| 443 | return 0; | ||
| 444 | topology_enabled = 0; | ||
| 445 | return 0; | ||
| 446 | } | ||
| 447 | early_param("topology", early_parse_topology); | ||
| 448 | |||
| 484 | static struct sched_domain_topology_level s390_topology[] = { | 449 | static struct sched_domain_topology_level s390_topology[] = { |
| 485 | { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, | 450 | { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, |
| 486 | { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, | 451 | { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, |
| @@ -489,6 +454,42 @@ static struct sched_domain_topology_level s390_topology[] = { | |||
| 489 | { NULL, }, | 454 | { NULL, }, |
| 490 | }; | 455 | }; |
| 491 | 456 | ||
| 457 | static void __init alloc_masks(struct sysinfo_15_1_x *info, | ||
| 458 | struct mask_info *mask, int offset) | ||
| 459 | { | ||
| 460 | int i, nr_masks; | ||
| 461 | |||
| 462 | nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; | ||
| 463 | for (i = 0; i < info->mnest - offset; i++) | ||
| 464 | nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; | ||
| 465 | nr_masks = max(nr_masks, 1); | ||
| 466 | for (i = 0; i < nr_masks; i++) { | ||
| 467 | mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); | ||
| 468 | mask = mask->next; | ||
| 469 | } | ||
| 470 | } | ||
| 471 | |||
| 472 | static int __init s390_topology_init(void) | ||
| 473 | { | ||
| 474 | struct sysinfo_15_1_x *info; | ||
| 475 | int i; | ||
| 476 | |||
| 477 | if (!MACHINE_HAS_TOPOLOGY) | ||
| 478 | return 0; | ||
| 479 | tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); | ||
| 480 | info = tl_info; | ||
| 481 | store_topology(info); | ||
| 482 | pr_info("The CPU configuration topology of the machine is:"); | ||
| 483 | for (i = 0; i < TOPOLOGY_NR_MAG; i++) | ||
| 484 | printk(KERN_CONT " %d", info->mag[i]); | ||
| 485 | printk(KERN_CONT " / %d\n", info->mnest); | ||
| 486 | alloc_masks(info, &socket_info, 1); | ||
| 487 | alloc_masks(info, &book_info, 2); | ||
| 488 | set_sched_topology(s390_topology); | ||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | early_initcall(s390_topology_init); | ||
| 492 | |||
| 492 | static int __init topology_init(void) | 493 | static int __init topology_init(void) |
| 493 | { | 494 | { |
| 494 | if (MACHINE_HAS_TOPOLOGY) | 495 | if (MACHINE_HAS_TOPOLOGY) |
| @@ -498,10 +499,3 @@ static int __init topology_init(void) | |||
| 498 | return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); | 499 | return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); |
| 499 | } | 500 | } |
| 500 | device_initcall(topology_init); | 501 | device_initcall(topology_init); |
| 501 | |||
| 502 | static int __init early_topology_init(void) | ||
| 503 | { | ||
| 504 | set_sched_topology(s390_topology); | ||
| 505 | return 0; | ||
| 506 | } | ||
| 507 | early_initcall(early_topology_init); | ||
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 7699e735ae28..61541fb93dc6 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S | |||
| @@ -25,9 +25,7 @@ __kernel_clock_gettime: | |||
| 25 | je 4f | 25 | je 4f |
| 26 | cghi %r2,__CLOCK_REALTIME | 26 | cghi %r2,__CLOCK_REALTIME |
| 27 | je 5f | 27 | je 5f |
| 28 | cghi %r2,__CLOCK_THREAD_CPUTIME_ID | 28 | cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ |
| 29 | je 9f | ||
| 30 | cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ | ||
| 31 | je 9f | 29 | je 9f |
| 32 | cghi %r2,__CLOCK_MONOTONIC_COARSE | 30 | cghi %r2,__CLOCK_MONOTONIC_COARSE |
| 33 | je 3f | 31 | je 3f |
| @@ -106,7 +104,7 @@ __kernel_clock_gettime: | |||
| 106 | aghi %r15,16 | 104 | aghi %r15,16 |
| 107 | br %r14 | 105 | br %r14 |
| 108 | 106 | ||
| 109 | /* CLOCK_THREAD_CPUTIME_ID for this thread */ | 107 | /* CPUCLOCK_VIRT for this thread */ |
| 110 | 9: icm %r0,15,__VDSO_ECTG_OK(%r5) | 108 | 9: icm %r0,15,__VDSO_ECTG_OK(%r5) |
| 111 | jz 12f | 109 | jz 12f |
| 112 | ear %r2,%a4 | 110 | ear %r2,%a4 |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index d008f638b2cd..179a2c20b01f 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
| @@ -183,7 +183,10 @@ unsigned long randomize_et_dyn(void) | |||
| 183 | { | 183 | { |
| 184 | unsigned long base; | 184 | unsigned long base; |
| 185 | 185 | ||
| 186 | base = (STACK_TOP / 3 * 2) & (~mmap_align_mask << PAGE_SHIFT); | 186 | base = STACK_TOP / 3 * 2; |
| 187 | if (!is_32bit_task()) | ||
| 188 | /* Align to 4GB */ | ||
| 189 | base &= ~((1UL << 32) - 1); | ||
| 187 | return base + mmap_rnd(); | 190 | return base + mmap_rnd(); |
| 188 | } | 191 | } |
| 189 | 192 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index eb1cf898ed3c..c2fb8a87dccb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -488,6 +488,22 @@ config X86_INTEL_MID | |||
| 488 | Intel MID platforms are based on an Intel processor and chipset which | 488 | Intel MID platforms are based on an Intel processor and chipset which |
| 489 | consume less power than most of the x86 derivatives. | 489 | consume less power than most of the x86 derivatives. |
| 490 | 490 | ||
| 491 | config X86_INTEL_QUARK | ||
| 492 | bool "Intel Quark platform support" | ||
| 493 | depends on X86_32 | ||
| 494 | depends on X86_EXTENDED_PLATFORM | ||
| 495 | depends on X86_PLATFORM_DEVICES | ||
| 496 | depends on X86_TSC | ||
| 497 | depends on PCI | ||
| 498 | depends on PCI_GOANY | ||
| 499 | depends on X86_IO_APIC | ||
| 500 | select IOSF_MBI | ||
| 501 | select INTEL_IMR | ||
| 502 | ---help--- | ||
| 503 | Select to include support for Quark X1000 SoC. | ||
| 504 | Say Y here if you have a Quark based system such as the Arduino | ||
| 505 | compatible Intel Galileo. | ||
| 506 | |||
| 491 | config X86_INTEL_LPSS | 507 | config X86_INTEL_LPSS |
| 492 | bool "Intel Low Power Subsystem Support" | 508 | bool "Intel Low Power Subsystem Support" |
| 493 | depends on ACPI | 509 | depends on ACPI |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 61bd2ad94281..20028da8ae18 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -313,6 +313,19 @@ config DEBUG_NMI_SELFTEST | |||
| 313 | 313 | ||
| 314 | If unsure, say N. | 314 | If unsure, say N. |
| 315 | 315 | ||
| 316 | config DEBUG_IMR_SELFTEST | ||
| 317 | bool "Isolated Memory Region self test" | ||
| 318 | default n | ||
| 319 | depends on INTEL_IMR | ||
| 320 | ---help--- | ||
| 321 | This option enables automated sanity testing of the IMR code. | ||
| 322 | Some simple tests are run to verify IMR bounds checking, alignment | ||
| 323 | and overlapping. This option is really only useful if you are | ||
| 324 | debugging an IMR memory map or are modifying the IMR code and want to | ||
| 325 | test your changes. | ||
| 326 | |||
| 327 | If unsure say N here. | ||
| 328 | |||
| 316 | config X86_DEBUG_STATIC_CPU_HAS | 329 | config X86_DEBUG_STATIC_CPU_HAS |
| 317 | bool "Debug alternatives" | 330 | bool "Debug alternatives" |
| 318 | depends on DEBUG_KERNEL | 331 | depends on DEBUG_KERNEL |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 843feb3eb20b..0a291cdfaf77 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
| @@ -51,6 +51,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone | |||
| 51 | 51 | ||
| 52 | vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ | 52 | vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ |
| 53 | $(objtree)/drivers/firmware/efi/libstub/lib.a | 53 | $(objtree)/drivers/firmware/efi/libstub/lib.a |
| 54 | vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o | ||
| 54 | 55 | ||
| 55 | $(obj)/vmlinux: $(vmlinux-objs-y) FORCE | 56 | $(obj)/vmlinux: $(vmlinux-objs-y) FORCE |
| 56 | $(call if_changed,ld) | 57 | $(call if_changed,ld) |
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index bb1376381985..7083c16cccba 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c | |||
| @@ -14,6 +14,13 @@ | |||
| 14 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" | 14 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" |
| 15 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; | 15 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; |
| 16 | 16 | ||
| 17 | struct kaslr_setup_data { | ||
| 18 | __u64 next; | ||
| 19 | __u32 type; | ||
| 20 | __u32 len; | ||
| 21 | __u8 data[1]; | ||
| 22 | } kaslr_setup_data; | ||
| 23 | |||
| 17 | #define I8254_PORT_CONTROL 0x43 | 24 | #define I8254_PORT_CONTROL 0x43 |
| 18 | #define I8254_PORT_COUNTER0 0x40 | 25 | #define I8254_PORT_COUNTER0 0x40 |
| 19 | #define I8254_CMD_READBACK 0xC0 | 26 | #define I8254_CMD_READBACK 0xC0 |
| @@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum, | |||
| 295 | return slots_fetch_random(); | 302 | return slots_fetch_random(); |
| 296 | } | 303 | } |
| 297 | 304 | ||
| 298 | unsigned char *choose_kernel_location(unsigned char *input, | 305 | static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled) |
| 306 | { | ||
| 307 | struct setup_data *data; | ||
| 308 | |||
| 309 | kaslr_setup_data.type = SETUP_KASLR; | ||
| 310 | kaslr_setup_data.len = 1; | ||
| 311 | kaslr_setup_data.next = 0; | ||
| 312 | kaslr_setup_data.data[0] = enabled; | ||
| 313 | |||
| 314 | data = (struct setup_data *)(unsigned long)params->hdr.setup_data; | ||
| 315 | |||
| 316 | while (data && data->next) | ||
| 317 | data = (struct setup_data *)(unsigned long)data->next; | ||
| 318 | |||
| 319 | if (data) | ||
| 320 | data->next = (unsigned long)&kaslr_setup_data; | ||
| 321 | else | ||
| 322 | params->hdr.setup_data = (unsigned long)&kaslr_setup_data; | ||
| 323 | |||
| 324 | } | ||
| 325 | |||
| 326 | unsigned char *choose_kernel_location(struct boot_params *params, | ||
| 327 | unsigned char *input, | ||
| 299 | unsigned long input_size, | 328 | unsigned long input_size, |
| 300 | unsigned char *output, | 329 | unsigned char *output, |
| 301 | unsigned long output_size) | 330 | unsigned long output_size) |
| @@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input, | |||
| 306 | #ifdef CONFIG_HIBERNATION | 335 | #ifdef CONFIG_HIBERNATION |
| 307 | if (!cmdline_find_option_bool("kaslr")) { | 336 | if (!cmdline_find_option_bool("kaslr")) { |
| 308 | debug_putstr("KASLR disabled by default...\n"); | 337 | debug_putstr("KASLR disabled by default...\n"); |
| 338 | add_kaslr_setup_data(params, 0); | ||
| 309 | goto out; | 339 | goto out; |
| 310 | } | 340 | } |
| 311 | #else | 341 | #else |
| 312 | if (cmdline_find_option_bool("nokaslr")) { | 342 | if (cmdline_find_option_bool("nokaslr")) { |
| 313 | debug_putstr("KASLR disabled by cmdline...\n"); | 343 | debug_putstr("KASLR disabled by cmdline...\n"); |
| 344 | add_kaslr_setup_data(params, 0); | ||
| 314 | goto out; | 345 | goto out; |
| 315 | } | 346 | } |
| 316 | #endif | 347 | #endif |
| 348 | add_kaslr_setup_data(params, 1); | ||
| 317 | 349 | ||
| 318 | /* Record the various known unsafe memory ranges. */ | 350 | /* Record the various known unsafe memory ranges. */ |
| 319 | mem_avoid_init((unsigned long)input, input_size, | 351 | mem_avoid_init((unsigned long)input, input_size, |
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index 7ff3632806b1..99494dff2113 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S | |||
| @@ -3,28 +3,3 @@ | |||
| 3 | #include <asm/processor-flags.h> | 3 | #include <asm/processor-flags.h> |
| 4 | 4 | ||
| 5 | #include "../../platform/efi/efi_stub_64.S" | 5 | #include "../../platform/efi/efi_stub_64.S" |
| 6 | |||
| 7 | #ifdef CONFIG_EFI_MIXED | ||
| 8 | .code64 | ||
| 9 | .text | ||
| 10 | ENTRY(efi64_thunk) | ||
| 11 | push %rbp | ||
| 12 | push %rbx | ||
| 13 | |||
| 14 | subq $16, %rsp | ||
| 15 | leaq efi_exit32(%rip), %rax | ||
| 16 | movl %eax, 8(%rsp) | ||
| 17 | leaq efi_gdt64(%rip), %rax | ||
| 18 | movl %eax, 4(%rsp) | ||
| 19 | movl %eax, 2(%rax) /* Fixup the gdt base address */ | ||
| 20 | leaq efi32_boot_gdt(%rip), %rax | ||
| 21 | movl %eax, (%rsp) | ||
| 22 | |||
| 23 | call __efi64_thunk | ||
| 24 | |||
| 25 | addq $16, %rsp | ||
| 26 | pop %rbx | ||
| 27 | pop %rbp | ||
| 28 | ret | ||
| 29 | ENDPROC(efi64_thunk) | ||
| 30 | #endif /* CONFIG_EFI_MIXED */ | ||
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000000..630384a4c14a --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming | ||
| 3 | * | ||
| 4 | * Early support for invoking 32-bit EFI services from a 64-bit kernel. | ||
| 5 | * | ||
| 6 | * Because this thunking occurs before ExitBootServices() we have to | ||
| 7 | * restore the firmware's 32-bit GDT before we make EFI serivce calls, | ||
| 8 | * since the firmware's 32-bit IDT is still currently installed and it | ||
| 9 | * needs to be able to service interrupts. | ||
| 10 | * | ||
| 11 | * On the plus side, we don't have to worry about mangling 64-bit | ||
| 12 | * addresses into 32-bits because we're executing with an identify | ||
| 13 | * mapped pagetable and haven't transitioned to 64-bit virtual addresses | ||
| 14 | * yet. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/linkage.h> | ||
| 18 | #include <asm/msr.h> | ||
| 19 | #include <asm/page_types.h> | ||
| 20 | #include <asm/processor-flags.h> | ||
| 21 | #include <asm/segment.h> | ||
| 22 | |||
| 23 | .code64 | ||
| 24 | .text | ||
| 25 | ENTRY(efi64_thunk) | ||
| 26 | push %rbp | ||
| 27 | push %rbx | ||
| 28 | |||
| 29 | subq $8, %rsp | ||
| 30 | leaq efi_exit32(%rip), %rax | ||
| 31 | movl %eax, 4(%rsp) | ||
| 32 | leaq efi_gdt64(%rip), %rax | ||
| 33 | movl %eax, (%rsp) | ||
| 34 | movl %eax, 2(%rax) /* Fixup the gdt base address */ | ||
| 35 | |||
| 36 | movl %ds, %eax | ||
| 37 | push %rax | ||
| 38 | movl %es, %eax | ||
| 39 | push %rax | ||
| 40 | movl %ss, %eax | ||
| 41 | push %rax | ||
| 42 | |||
| 43 | /* | ||
| 44 | * Convert x86-64 ABI params to i386 ABI | ||
| 45 | */ | ||
| 46 | subq $32, %rsp | ||
| 47 | movl %esi, 0x0(%rsp) | ||
| 48 | movl %edx, 0x4(%rsp) | ||
| 49 | movl %ecx, 0x8(%rsp) | ||
| 50 | movq %r8, %rsi | ||
| 51 | movl %esi, 0xc(%rsp) | ||
| 52 | movq %r9, %rsi | ||
| 53 | movl %esi, 0x10(%rsp) | ||
| 54 | |||
| 55 | sgdt save_gdt(%rip) | ||
| 56 | |||
| 57 | leaq 1f(%rip), %rbx | ||
| 58 | movq %rbx, func_rt_ptr(%rip) | ||
| 59 | |||
| 60 | /* | ||
| 61 | * Switch to gdt with 32-bit segments. This is the firmware GDT | ||
| 62 | * that was installed when the kernel started executing. This | ||
| 63 | * pointer was saved at the EFI stub entry point in head_64.S. | ||
| 64 | */ | ||
| 65 | leaq efi32_boot_gdt(%rip), %rax | ||
| 66 | lgdt (%rax) | ||
| 67 | |||
| 68 | pushq $__KERNEL_CS | ||
| 69 | leaq efi_enter32(%rip), %rax | ||
| 70 | pushq %rax | ||
| 71 | lretq | ||
| 72 | |||
| 73 | 1: addq $32, %rsp | ||
| 74 | |||
| 75 | lgdt save_gdt(%rip) | ||
| 76 | |||
| 77 | pop %rbx | ||
| 78 | movl %ebx, %ss | ||
| 79 | pop %rbx | ||
| 80 | movl %ebx, %es | ||
| 81 | pop %rbx | ||
| 82 | movl %ebx, %ds | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Convert 32-bit status code into 64-bit. | ||
| 86 | */ | ||
| 87 | test %rax, %rax | ||
| 88 | jz 1f | ||
| 89 | movl %eax, %ecx | ||
| 90 | andl $0x0fffffff, %ecx | ||
| 91 | andl $0xf0000000, %eax | ||
| 92 | shl $32, %rax | ||
| 93 | or %rcx, %rax | ||
| 94 | 1: | ||
| 95 | addq $8, %rsp | ||
| 96 | pop %rbx | ||
| 97 | pop %rbp | ||
| 98 | ret | ||
| 99 | ENDPROC(efi64_thunk) | ||
| 100 | |||
| 101 | ENTRY(efi_exit32) | ||
| 102 | movq func_rt_ptr(%rip), %rax | ||
| 103 | push %rax | ||
| 104 | mov %rdi, %rax | ||
| 105 | ret | ||
| 106 | ENDPROC(efi_exit32) | ||
| 107 | |||
| 108 | .code32 | ||
| 109 | /* | ||
| 110 | * EFI service pointer must be in %edi. | ||
| 111 | * | ||
| 112 | * The stack should represent the 32-bit calling convention. | ||
| 113 | */ | ||
| 114 | ENTRY(efi_enter32) | ||
| 115 | movl $__KERNEL_DS, %eax | ||
| 116 | movl %eax, %ds | ||
| 117 | movl %eax, %es | ||
| 118 | movl %eax, %ss | ||
| 119 | |||
| 120 | /* Reload pgtables */ | ||
| 121 | movl %cr3, %eax | ||
| 122 | movl %eax, %cr3 | ||
| 123 | |||
| 124 | /* Disable paging */ | ||
| 125 | movl %cr0, %eax | ||
| 126 | btrl $X86_CR0_PG_BIT, %eax | ||
| 127 | movl %eax, %cr0 | ||
| 128 | |||
| 129 | /* Disable long mode via EFER */ | ||
| 130 | movl $MSR_EFER, %ecx | ||
| 131 | rdmsr | ||
| 132 | btrl $_EFER_LME, %eax | ||
| 133 | wrmsr | ||
| 134 | |||
| 135 | call *%edi | ||
| 136 | |||
| 137 | /* We must preserve return value */ | ||
| 138 | movl %eax, %edi | ||
| 139 | |||
| 140 | /* | ||
| 141 | * Some firmware will return with interrupts enabled. Be sure to | ||
| 142 | * disable them before we switch GDTs. | ||
| 143 | */ | ||
| 144 | cli | ||
| 145 | |||
| 146 | movl 56(%esp), %eax | ||
| 147 | movl %eax, 2(%eax) | ||
| 148 | lgdtl (%eax) | ||
| 149 | |||
| 150 | movl %cr4, %eax | ||
| 151 | btsl $(X86_CR4_PAE_BIT), %eax | ||
| 152 | movl %eax, %cr4 | ||
| 153 | |||
| 154 | movl %cr3, %eax | ||
| 155 | movl %eax, %cr3 | ||
| 156 | |||
| 157 | movl $MSR_EFER, %ecx | ||
| 158 | rdmsr | ||
| 159 | btsl $_EFER_LME, %eax | ||
| 160 | wrmsr | ||
| 161 | |||
| 162 | xorl %eax, %eax | ||
| 163 | lldt %ax | ||
| 164 | |||
| 165 | movl 60(%esp), %eax | ||
| 166 | pushl $__KERNEL_CS | ||
| 167 | pushl %eax | ||
| 168 | |||
| 169 | /* Enable paging */ | ||
| 170 | movl %cr0, %eax | ||
| 171 | btsl $X86_CR0_PG_BIT, %eax | ||
| 172 | movl %eax, %cr0 | ||
| 173 | lret | ||
| 174 | ENDPROC(efi_enter32) | ||
| 175 | |||
| 176 | .data | ||
| 177 | .balign 8 | ||
| 178 | .global efi32_boot_gdt | ||
| 179 | efi32_boot_gdt: .word 0 | ||
| 180 | .quad 0 | ||
| 181 | |||
| 182 | save_gdt: .word 0 | ||
| 183 | .quad 0 | ||
| 184 | func_rt_ptr: .quad 0 | ||
| 185 | |||
| 186 | .global efi_gdt64 | ||
| 187 | efi_gdt64: | ||
| 188 | .word efi_gdt64_end - efi_gdt64 | ||
| 189 | .long 0 /* Filled out by user */ | ||
| 190 | .word 0 | ||
| 191 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
| 192 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
| 193 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
| 194 | .quad 0x0080890000000000 /* TS descriptor */ | ||
| 195 | .quad 0x0000000000000000 /* TS continued */ | ||
| 196 | efi_gdt64_end: | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a950864a64da..5903089c818f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
| @@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
| 401 | * the entire decompressed kernel plus relocation table, or the | 401 | * the entire decompressed kernel plus relocation table, or the |
| 402 | * entire decompressed kernel plus .bss and .brk sections. | 402 | * entire decompressed kernel plus .bss and .brk sections. |
| 403 | */ | 403 | */ |
| 404 | output = choose_kernel_location(input_data, input_len, output, | 404 | output = choose_kernel_location(real_mode, input_data, input_len, |
| 405 | output, | ||
| 405 | output_len > run_size ? output_len | 406 | output_len > run_size ? output_len |
| 406 | : run_size); | 407 | : run_size); |
| 407 | 408 | ||
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 04477d68403f..ee3576b2666b 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
| @@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option); | |||
| 57 | 57 | ||
| 58 | #if CONFIG_RANDOMIZE_BASE | 58 | #if CONFIG_RANDOMIZE_BASE |
| 59 | /* aslr.c */ | 59 | /* aslr.c */ |
| 60 | unsigned char *choose_kernel_location(unsigned char *input, | 60 | unsigned char *choose_kernel_location(struct boot_params *params, |
| 61 | unsigned char *input, | ||
| 61 | unsigned long input_size, | 62 | unsigned long input_size, |
| 62 | unsigned char *output, | 63 | unsigned char *output, |
| 63 | unsigned long output_size); | 64 | unsigned long output_size); |
| @@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input, | |||
| 65 | bool has_cpuflag(int flag); | 66 | bool has_cpuflag(int flag); |
| 66 | #else | 67 | #else |
| 67 | static inline | 68 | static inline |
| 68 | unsigned char *choose_kernel_location(unsigned char *input, | 69 | unsigned char *choose_kernel_location(struct boot_params *params, |
| 70 | unsigned char *input, | ||
| 69 | unsigned long input_size, | 71 | unsigned long input_size, |
| 70 | unsigned char *output, | 72 | unsigned char *output, |
| 71 | unsigned long output_size) | 73 | unsigned long output_size) |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 92003f3c8a42..efc3b22d896e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
| @@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address); | |||
| 213 | extern void setup_boot_APIC_clock(void); | 213 | extern void setup_boot_APIC_clock(void); |
| 214 | extern void setup_secondary_APIC_clock(void); | 214 | extern void setup_secondary_APIC_clock(void); |
| 215 | extern int APIC_init_uniprocessor(void); | 215 | extern int APIC_init_uniprocessor(void); |
| 216 | |||
| 217 | #ifdef CONFIG_X86_64 | ||
| 218 | static inline int apic_force_enable(unsigned long addr) | ||
| 219 | { | ||
| 220 | return -1; | ||
| 221 | } | ||
| 222 | #else | ||
| 216 | extern int apic_force_enable(unsigned long addr); | 223 | extern int apic_force_enable(unsigned long addr); |
| 224 | #endif | ||
| 217 | 225 | ||
| 218 | extern int apic_bsp_setup(bool upmode); | 226 | extern int apic_bsp_setup(bool upmode); |
| 219 | extern void apic_ap_setup(void); | 227 | extern void apic_ap_setup(void); |
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h new file mode 100644 index 000000000000..cd2ce4068441 --- /dev/null +++ b/arch/x86/include/asm/imr.h | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | /* | ||
| 2 | * imr.h: Isolated Memory Region API | ||
| 3 | * | ||
| 4 | * Copyright(c) 2013 Intel Corporation. | ||
| 5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; version 2 | ||
| 10 | * of the License. | ||
| 11 | */ | ||
| 12 | #ifndef _IMR_H | ||
| 13 | #define _IMR_H | ||
| 14 | |||
| 15 | #include <linux/types.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * IMR agent access mask bits | ||
| 19 | * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register | ||
| 20 | * definitions. | ||
| 21 | */ | ||
| 22 | #define IMR_ESRAM_FLUSH BIT(31) | ||
| 23 | #define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */ | ||
| 24 | #define IMR_RMU BIT(29) | ||
| 25 | #define IMR_VC1_SAI_ID3 BIT(15) | ||
| 26 | #define IMR_VC1_SAI_ID2 BIT(14) | ||
| 27 | #define IMR_VC1_SAI_ID1 BIT(13) | ||
| 28 | #define IMR_VC1_SAI_ID0 BIT(12) | ||
| 29 | #define IMR_VC0_SAI_ID3 BIT(11) | ||
| 30 | #define IMR_VC0_SAI_ID2 BIT(10) | ||
| 31 | #define IMR_VC0_SAI_ID1 BIT(9) | ||
| 32 | #define IMR_VC0_SAI_ID0 BIT(8) | ||
| 33 | #define IMR_CPU_0 BIT(1) /* SMM mode */ | ||
| 34 | #define IMR_CPU BIT(0) /* Non SMM mode */ | ||
| 35 | #define IMR_ACCESS_NONE 0 | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Read/Write access-all bits here include some reserved bits | ||
| 39 | * These are the values firmware uses and are accepted by hardware. | ||
| 40 | * The kernel defines read/write access-all in the same way as firmware | ||
| 41 | * in order to have a consistent and crisp definition across firmware, | ||
| 42 | * bootloader and kernel. | ||
| 43 | */ | ||
| 44 | #define IMR_READ_ACCESS_ALL 0xBFFFFFFF | ||
| 45 | #define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF | ||
| 46 | |||
| 47 | /* Number of IMRs provided by Quark X1000 SoC */ | ||
| 48 | #define QUARK_X1000_IMR_MAX 0x08 | ||
| 49 | #define QUARK_X1000_IMR_REGBASE 0x40 | ||
| 50 | |||
| 51 | /* IMR alignment bits - only bits 31:10 are checked for IMR validity */ | ||
| 52 | #define IMR_ALIGN 0x400 | ||
| 53 | #define IMR_MASK (IMR_ALIGN - 1) | ||
| 54 | |||
| 55 | int imr_add_range(phys_addr_t base, size_t size, | ||
| 56 | unsigned int rmask, unsigned int wmask, bool lock); | ||
| 57 | |||
| 58 | int imr_remove_range(phys_addr_t base, size_t size); | ||
| 59 | |||
| 60 | #endif /* _IMR_H */ | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..95e11f79f123 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
| @@ -51,6 +51,8 @@ extern int devmem_is_allowed(unsigned long pagenr); | |||
| 51 | extern unsigned long max_low_pfn_mapped; | 51 | extern unsigned long max_low_pfn_mapped; |
| 52 | extern unsigned long max_pfn_mapped; | 52 | extern unsigned long max_pfn_mapped; |
| 53 | 53 | ||
| 54 | extern bool kaslr_enabled; | ||
| 55 | |||
| 54 | static inline phys_addr_t get_max_mapped(void) | 56 | static inline phys_addr_t get_max_mapped(void) |
| 55 | { | 57 | { |
| 56 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; | 58 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 7050d864f520..cf87de3fc390 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
| @@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key); | |||
| 46 | 46 | ||
| 47 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) | 47 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) |
| 48 | { | 48 | { |
| 49 | set_bit(0, (volatile unsigned long *)&lock->tickets.tail); | 49 | set_bit(0, (volatile unsigned long *)&lock->tickets.head); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | 52 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ |
| @@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock, | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | 62 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
| 63 | static inline int __tickets_equal(__ticket_t one, __ticket_t two) | ||
| 64 | { | ||
| 65 | return !((one ^ two) & ~TICKET_SLOWPATH_FLAG); | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock, | ||
| 69 | __ticket_t head) | ||
| 70 | { | ||
| 71 | if (head & TICKET_SLOWPATH_FLAG) { | ||
| 72 | arch_spinlock_t old, new; | ||
| 73 | |||
| 74 | old.tickets.head = head; | ||
| 75 | new.tickets.head = head & ~TICKET_SLOWPATH_FLAG; | ||
| 76 | old.tickets.tail = new.tickets.head + TICKET_LOCK_INC; | ||
| 77 | new.tickets.tail = old.tickets.tail; | ||
| 78 | |||
| 79 | /* try to clear slowpath flag when there are no contenders */ | ||
| 80 | cmpxchg(&lock->head_tail, old.head_tail, new.head_tail); | ||
| 81 | } | ||
| 82 | } | ||
| 63 | 83 | ||
| 64 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) | 84 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) |
| 65 | { | 85 | { |
| 66 | return lock.tickets.head == lock.tickets.tail; | 86 | return __tickets_equal(lock.tickets.head, lock.tickets.tail); |
| 67 | } | 87 | } |
| 68 | 88 | ||
| 69 | /* | 89 | /* |
| @@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) | |||
| 87 | if (likely(inc.head == inc.tail)) | 107 | if (likely(inc.head == inc.tail)) |
| 88 | goto out; | 108 | goto out; |
| 89 | 109 | ||
| 90 | inc.tail &= ~TICKET_SLOWPATH_FLAG; | ||
| 91 | for (;;) { | 110 | for (;;) { |
| 92 | unsigned count = SPIN_THRESHOLD; | 111 | unsigned count = SPIN_THRESHOLD; |
| 93 | 112 | ||
| 94 | do { | 113 | do { |
| 95 | if (READ_ONCE(lock->tickets.head) == inc.tail) | 114 | inc.head = READ_ONCE(lock->tickets.head); |
| 96 | goto out; | 115 | if (__tickets_equal(inc.head, inc.tail)) |
| 116 | goto clear_slowpath; | ||
| 97 | cpu_relax(); | 117 | cpu_relax(); |
| 98 | } while (--count); | 118 | } while (--count); |
| 99 | __ticket_lock_spinning(lock, inc.tail); | 119 | __ticket_lock_spinning(lock, inc.tail); |
| 100 | } | 120 | } |
| 101 | out: barrier(); /* make sure nothing creeps before the lock is taken */ | 121 | clear_slowpath: |
| 122 | __ticket_check_and_clear_slowpath(lock, inc.head); | ||
| 123 | out: | ||
| 124 | barrier(); /* make sure nothing creeps before the lock is taken */ | ||
| 102 | } | 125 | } |
| 103 | 126 | ||
| 104 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | 127 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
| @@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | |||
| 106 | arch_spinlock_t old, new; | 129 | arch_spinlock_t old, new; |
| 107 | 130 | ||
| 108 | old.tickets = READ_ONCE(lock->tickets); | 131 | old.tickets = READ_ONCE(lock->tickets); |
| 109 | if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) | 132 | if (!__tickets_equal(old.tickets.head, old.tickets.tail)) |
| 110 | return 0; | 133 | return 0; |
| 111 | 134 | ||
| 112 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); | 135 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); |
| 136 | new.head_tail &= ~TICKET_SLOWPATH_FLAG; | ||
| 113 | 137 | ||
| 114 | /* cmpxchg is a full barrier, so nothing can move before it */ | 138 | /* cmpxchg is a full barrier, so nothing can move before it */ |
| 115 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 139 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
| 116 | } | 140 | } |
| 117 | 141 | ||
| 118 | static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, | ||
| 119 | arch_spinlock_t old) | ||
| 120 | { | ||
| 121 | arch_spinlock_t new; | ||
| 122 | |||
| 123 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); | ||
| 124 | |||
| 125 | /* Perform the unlock on the "before" copy */ | ||
| 126 | old.tickets.head += TICKET_LOCK_INC; | ||
| 127 | |||
| 128 | /* Clear the slowpath flag */ | ||
| 129 | new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); | ||
| 130 | |||
| 131 | /* | ||
| 132 | * If the lock is uncontended, clear the flag - use cmpxchg in | ||
| 133 | * case it changes behind our back though. | ||
| 134 | */ | ||
| 135 | if (new.tickets.head != new.tickets.tail || | ||
| 136 | cmpxchg(&lock->head_tail, old.head_tail, | ||
| 137 | new.head_tail) != old.head_tail) { | ||
| 138 | /* | ||
| 139 | * Lock still has someone queued for it, so wake up an | ||
| 140 | * appropriate waiter. | ||
| 141 | */ | ||
| 142 | __ticket_unlock_kick(lock, old.tickets.head); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) | 142 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
| 147 | { | 143 | { |
| 148 | if (TICKET_SLOWPATH_FLAG && | 144 | if (TICKET_SLOWPATH_FLAG && |
| 149 | static_key_false(¶virt_ticketlocks_enabled)) { | 145 | static_key_false(¶virt_ticketlocks_enabled)) { |
| 150 | arch_spinlock_t prev; | 146 | __ticket_t head; |
| 151 | 147 | ||
| 152 | prev = *lock; | 148 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); |
| 153 | add_smp(&lock->tickets.head, TICKET_LOCK_INC); | ||
| 154 | 149 | ||
| 155 | /* add_smp() is a full mb() */ | 150 | head = xadd(&lock->tickets.head, TICKET_LOCK_INC); |
| 156 | 151 | ||
| 157 | if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) | 152 | if (unlikely(head & TICKET_SLOWPATH_FLAG)) { |
| 158 | __ticket_unlock_slowpath(lock, prev); | 153 | head &= ~TICKET_SLOWPATH_FLAG; |
| 154 | __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC)); | ||
| 155 | } | ||
| 159 | } else | 156 | } else |
| 160 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); | 157 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); |
| 161 | } | 158 | } |
| @@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) | |||
| 164 | { | 161 | { |
| 165 | struct __raw_tickets tmp = READ_ONCE(lock->tickets); | 162 | struct __raw_tickets tmp = READ_ONCE(lock->tickets); |
| 166 | 163 | ||
| 167 | return tmp.tail != tmp.head; | 164 | return !__tickets_equal(tmp.tail, tmp.head); |
| 168 | } | 165 | } |
| 169 | 166 | ||
| 170 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | 167 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
| 171 | { | 168 | { |
| 172 | struct __raw_tickets tmp = READ_ONCE(lock->tickets); | 169 | struct __raw_tickets tmp = READ_ONCE(lock->tickets); |
| 173 | 170 | ||
| 174 | return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; | 171 | tmp.head &= ~TICKET_SLOWPATH_FLAG; |
| 172 | return (tmp.tail - tmp.head) > TICKET_LOCK_INC; | ||
| 175 | } | 173 | } |
| 176 | #define arch_spin_is_contended arch_spin_is_contended | 174 | #define arch_spin_is_contended arch_spin_is_contended |
| 177 | 175 | ||
| @@ -191,8 +189,8 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | |||
| 191 | * We need to check "unlocked" in a loop, tmp.head == head | 189 | * We need to check "unlocked" in a loop, tmp.head == head |
| 192 | * can be false positive because of overflow. | 190 | * can be false positive because of overflow. |
| 193 | */ | 191 | */ |
| 194 | if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || | 192 | if (__tickets_equal(tmp.head, tmp.tail) || |
| 195 | tmp.head != head) | 193 | !__tickets_equal(tmp.head, head)) |
| 196 | break; | 194 | break; |
| 197 | 195 | ||
| 198 | cpu_relax(); | 196 | cpu_relax(); |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 225b0988043a..44e6dd7e36a2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #define SETUP_DTB 2 | 7 | #define SETUP_DTB 2 |
| 8 | #define SETUP_PCI 3 | 8 | #define SETUP_PCI 3 |
| 9 | #define SETUP_EFI 4 | 9 | #define SETUP_EFI 4 |
| 10 | #define SETUP_KASLR 5 | ||
| 10 | 11 | ||
| 11 | /* ram_size flags */ | 12 | /* ram_size flags */ |
| 12 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 13 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae97ed0873c6..3d525c6124f6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) | |||
| 613 | { | 613 | { |
| 614 | int rc, irq, trigger, polarity; | 614 | int rc, irq, trigger, polarity; |
| 615 | 615 | ||
| 616 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { | ||
| 617 | *irqp = gsi; | ||
| 618 | return 0; | ||
| 619 | } | ||
| 620 | |||
| 616 | rc = acpi_get_override_irq(gsi, &trigger, &polarity); | 621 | rc = acpi_get_override_irq(gsi, &trigger, &polarity); |
| 617 | if (rc == 0) { | 622 | if (rc == 0) { |
| 618 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; | 623 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index c6826d1e8082..746e7fd08aad 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
| @@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 196 | struct microcode_header_intel mc_header; | 196 | struct microcode_header_intel mc_header; |
| 197 | unsigned int mc_size; | 197 | unsigned int mc_size; |
| 198 | 198 | ||
| 199 | if (leftover < sizeof(mc_header)) { | ||
| 200 | pr_err("error! Truncated header in microcode data file\n"); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | |||
| 199 | if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) | 204 | if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) |
| 200 | break; | 205 | break; |
| 201 | 206 | ||
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index ec9df6f9cd47..420eb933189c 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c | |||
| @@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start, | |||
| 321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; | 321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; |
| 322 | int i; | 322 | int i; |
| 323 | 323 | ||
| 324 | while (leftover) { | 324 | while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { |
| 325 | |||
| 326 | if (leftover < sizeof(mc_header)) | ||
| 327 | break; | ||
| 328 | |||
| 325 | mc_header = (struct microcode_header_intel *)ucode_ptr; | 329 | mc_header = (struct microcode_header_intel *)ucode_ptr; |
| 326 | 330 | ||
| 327 | mc_size = get_totalsize(mc_header); | 331 | mc_size = get_totalsize(mc_header); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 705ef8d48e2d..67b1cbe0093a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void) | |||
| 302 | irq = __this_cpu_read(vector_irq[vector]); | 302 | irq = __this_cpu_read(vector_irq[vector]); |
| 303 | if (irq >= 0) { | 303 | if (irq >= 0) { |
| 304 | desc = irq_to_desc(irq); | 304 | desc = irq_to_desc(irq); |
| 305 | if (!desc) | ||
| 306 | continue; | ||
| 307 | |||
| 305 | data = irq_desc_get_irq_data(desc); | 308 | data = irq_desc_get_irq_data(desc); |
| 306 | cpumask_copy(&affinity_new, data->affinity); | 309 | cpumask_copy(&affinity_new, data->affinity); |
| 307 | cpu_clear(this_cpu, affinity_new); | 310 | cpu_clear(this_cpu, affinity_new); |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 98f654d466e5..6a1146ea4d4d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
| @@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = { | |||
| 84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 85 | /* ---------------------------------------------- */ | 85 | /* ---------------------------------------------- */ |
| 86 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ | 86 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ |
| 87 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ | 87 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */ |
| 88 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ | 88 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ |
| 89 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | 89 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ |
| 90 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 90 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 94f643484300..e354cc6446ab 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -609,7 +609,7 @@ static inline void check_zero(void) | |||
| 609 | u8 ret; | 609 | u8 ret; |
| 610 | u8 old; | 610 | u8 old; |
| 611 | 611 | ||
| 612 | old = ACCESS_ONCE(zero_stats); | 612 | old = READ_ONCE(zero_stats); |
| 613 | if (unlikely(old)) { | 613 | if (unlikely(old)) { |
| 614 | ret = cmpxchg(&zero_stats, old, 0); | 614 | ret = cmpxchg(&zero_stats, old, 0); |
| 615 | /* This ensures only one fellow resets the stat */ | 615 | /* This ensures only one fellow resets the stat */ |
| @@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
| 727 | int cpu; | 727 | int cpu; |
| 728 | u64 start; | 728 | u64 start; |
| 729 | unsigned long flags; | 729 | unsigned long flags; |
| 730 | __ticket_t head; | ||
| 730 | 731 | ||
| 731 | if (in_nmi()) | 732 | if (in_nmi()) |
| 732 | return; | 733 | return; |
| @@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
| 768 | */ | 769 | */ |
| 769 | __ticket_enter_slowpath(lock); | 770 | __ticket_enter_slowpath(lock); |
| 770 | 771 | ||
| 772 | /* make sure enter_slowpath, which is atomic does not cross the read */ | ||
| 773 | smp_mb__after_atomic(); | ||
| 774 | |||
| 771 | /* | 775 | /* |
| 772 | * check again make sure it didn't become free while | 776 | * check again make sure it didn't become free while |
| 773 | * we weren't looking. | 777 | * we weren't looking. |
| 774 | */ | 778 | */ |
| 775 | if (ACCESS_ONCE(lock->tickets.head) == want) { | 779 | head = READ_ONCE(lock->tickets.head); |
| 780 | if (__tickets_equal(head, want)) { | ||
| 776 | add_stats(TAKEN_SLOW_PICKUP, 1); | 781 | add_stats(TAKEN_SLOW_PICKUP, 1); |
| 777 | goto out; | 782 | goto out; |
| 778 | } | 783 | } |
| @@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | |||
| 803 | add_stats(RELEASED_SLOW, 1); | 808 | add_stats(RELEASED_SLOW, 1); |
| 804 | for_each_cpu(cpu, &waiting_cpus) { | 809 | for_each_cpu(cpu, &waiting_cpus) { |
| 805 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | 810 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); |
| 806 | if (ACCESS_ONCE(w->lock) == lock && | 811 | if (READ_ONCE(w->lock) == lock && |
| 807 | ACCESS_ONCE(w->want) == ticket) { | 812 | READ_ONCE(w->want) == ticket) { |
| 808 | add_stats(RELEASED_SLOW_KICKED, 1); | 813 | add_stats(RELEASED_SLOW_KICKED, 1); |
| 809 | kvm_kick_cpu(cpu); | 814 | kvm_kick_cpu(cpu); |
| 810 | break; | 815 | break; |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index d1ac80b72c72..9bbb9b35c144 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
| @@ -47,21 +47,13 @@ do { \ | |||
| 47 | 47 | ||
| 48 | #ifdef CONFIG_RANDOMIZE_BASE | 48 | #ifdef CONFIG_RANDOMIZE_BASE |
| 49 | static unsigned long module_load_offset; | 49 | static unsigned long module_load_offset; |
| 50 | static int randomize_modules = 1; | ||
| 51 | 50 | ||
| 52 | /* Mutex protects the module_load_offset. */ | 51 | /* Mutex protects the module_load_offset. */ |
| 53 | static DEFINE_MUTEX(module_kaslr_mutex); | 52 | static DEFINE_MUTEX(module_kaslr_mutex); |
| 54 | 53 | ||
| 55 | static int __init parse_nokaslr(char *p) | ||
| 56 | { | ||
| 57 | randomize_modules = 0; | ||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | early_param("nokaslr", parse_nokaslr); | ||
| 61 | |||
| 62 | static unsigned long int get_module_load_offset(void) | 54 | static unsigned long int get_module_load_offset(void) |
| 63 | { | 55 | { |
| 64 | if (randomize_modules) { | 56 | if (kaslr_enabled) { |
| 65 | mutex_lock(&module_kaslr_mutex); | 57 | mutex_lock(&module_kaslr_mutex); |
| 66 | /* | 58 | /* |
| 67 | * Calculate the module_load_offset the first time this | 59 | * Calculate the module_load_offset the first time this |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0a2421cca01f..98dc9317286e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -122,6 +122,8 @@ | |||
| 122 | unsigned long max_low_pfn_mapped; | 122 | unsigned long max_low_pfn_mapped; |
| 123 | unsigned long max_pfn_mapped; | 123 | unsigned long max_pfn_mapped; |
| 124 | 124 | ||
| 125 | bool __read_mostly kaslr_enabled = false; | ||
| 126 | |||
| 125 | #ifdef CONFIG_DMI | 127 | #ifdef CONFIG_DMI |
| 126 | RESERVE_BRK(dmi_alloc, 65536); | 128 | RESERVE_BRK(dmi_alloc, 65536); |
| 127 | #endif | 129 | #endif |
| @@ -425,6 +427,11 @@ static void __init reserve_initrd(void) | |||
| 425 | } | 427 | } |
| 426 | #endif /* CONFIG_BLK_DEV_INITRD */ | 428 | #endif /* CONFIG_BLK_DEV_INITRD */ |
| 427 | 429 | ||
| 430 | static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) | ||
| 431 | { | ||
| 432 | kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); | ||
| 433 | } | ||
| 434 | |||
| 428 | static void __init parse_setup_data(void) | 435 | static void __init parse_setup_data(void) |
| 429 | { | 436 | { |
| 430 | struct setup_data *data; | 437 | struct setup_data *data; |
| @@ -450,6 +457,9 @@ static void __init parse_setup_data(void) | |||
| 450 | case SETUP_EFI: | 457 | case SETUP_EFI: |
| 451 | parse_efi_setup(pa_data, data_len); | 458 | parse_efi_setup(pa_data, data_len); |
| 452 | break; | 459 | break; |
| 460 | case SETUP_KASLR: | ||
| 461 | parse_kaslr_setup(pa_data, data_len); | ||
| 462 | break; | ||
| 453 | default: | 463 | default: |
| 454 | break; | 464 | break; |
| 455 | } | 465 | } |
| @@ -832,10 +842,14 @@ static void __init trim_low_memory_range(void) | |||
| 832 | static int | 842 | static int |
| 833 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) | 843 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) |
| 834 | { | 844 | { |
| 835 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx " | 845 | if (kaslr_enabled) |
| 836 | "(relocation range: 0x%lx-0x%lx)\n", | 846 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", |
| 837 | (unsigned long)&_text - __START_KERNEL, __START_KERNEL, | 847 | (unsigned long)&_text - __START_KERNEL, |
| 838 | __START_KERNEL_map, MODULES_VADDR-1); | 848 | __START_KERNEL, |
| 849 | __START_KERNEL_map, | ||
| 850 | MODULES_VADDR-1); | ||
| 851 | else | ||
| 852 | pr_emerg("Kernel Offset: disabled\n"); | ||
| 839 | 853 | ||
| 840 | return 0; | 854 | return 0; |
| 841 | } | 855 | } |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 8b96a947021f..81f8adb0679e 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
| @@ -66,27 +66,54 @@ | |||
| 66 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | 66 | * Good-instruction tables for 32-bit apps. This is non-const and volatile |
| 67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | 67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes |
| 68 | * some versions of gcc to think only *(unsigned long*) is used. | 68 | * some versions of gcc to think only *(unsigned long*) is used. |
| 69 | * | ||
| 70 | * Opcodes we'll probably never support: | ||
| 71 | * 6c-6f - ins,outs. SEGVs if used in userspace | ||
| 72 | * e4-e7 - in,out imm. SEGVs if used in userspace | ||
| 73 | * ec-ef - in,out acc. SEGVs if used in userspace | ||
| 74 | * cc - int3. SIGTRAP if used in userspace | ||
| 75 | * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs | ||
| 76 | * (why we support bound (62) then? it's similar, and similarly unused...) | ||
| 77 | * f1 - int1. SIGTRAP if used in userspace | ||
| 78 | * f4 - hlt. SEGVs if used in userspace | ||
| 79 | * fa - cli. SEGVs if used in userspace | ||
| 80 | * fb - sti. SEGVs if used in userspace | ||
| 81 | * | ||
| 82 | * Opcodes which need some work to be supported: | ||
| 83 | * 07,17,1f - pop es/ss/ds | ||
| 84 | * Normally not used in userspace, but would execute if used. | ||
| 85 | * Can cause GP or stack exception if tries to load wrong segment descriptor. | ||
| 86 | * We hesitate to run them under single step since kernel's handling | ||
| 87 | * of userspace single-stepping (TF flag) is fragile. | ||
| 88 | * We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e) | ||
| 89 | * on the same grounds that they are never used. | ||
| 90 | * cd - int N. | ||
| 91 | * Used by userspace for "int 80" syscall entry. (Other "int N" | ||
| 92 | * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3). | ||
| 93 | * Not supported since kernel's handling of userspace single-stepping | ||
| 94 | * (TF flag) is fragile. | ||
| 95 | * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad | ||
| 69 | */ | 96 | */ |
| 70 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 97 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
| 71 | static volatile u32 good_insns_32[256 / 32] = { | 98 | static volatile u32 good_insns_32[256 / 32] = { |
| 72 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 99 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 73 | /* ---------------------------------------------- */ | 100 | /* ---------------------------------------------- */ |
| 74 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | 101 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */ |
| 75 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | 102 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ |
| 76 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | 103 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ |
| 77 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | 104 | W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */ |
| 78 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 105 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
| 79 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 106 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
| 80 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | 107 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ |
| 81 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | 108 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ |
| 82 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 109 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
| 83 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 110 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ |
| 84 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | 111 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ |
| 85 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 112 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
| 86 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | 113 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ |
| 87 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 114 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
| 88 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | 115 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ |
| 89 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | 116 | W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ |
| 90 | /* ---------------------------------------------- */ | 117 | /* ---------------------------------------------- */ |
| 91 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 118 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 92 | }; | 119 | }; |
| @@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = { | |||
| 94 | #define good_insns_32 NULL | 121 | #define good_insns_32 NULL |
| 95 | #endif | 122 | #endif |
| 96 | 123 | ||
| 97 | /* Good-instruction tables for 64-bit apps */ | 124 | /* Good-instruction tables for 64-bit apps. |
| 125 | * | ||
| 126 | * Genuinely invalid opcodes: | ||
| 127 | * 06,07 - formerly push/pop es | ||
| 128 | * 0e - formerly push cs | ||
| 129 | * 16,17 - formerly push/pop ss | ||
| 130 | * 1e,1f - formerly push/pop ds | ||
| 131 | * 27,2f,37,3f - formerly daa/das/aaa/aas | ||
| 132 | * 60,61 - formerly pusha/popa | ||
| 133 | * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported) | ||
| 134 | * 82 - formerly redundant encoding of Group1 | ||
| 135 | * 9a - formerly call seg:ofs | ||
| 136 | * ce - formerly into | ||
| 137 | * d4,d5 - formerly aam/aad | ||
| 138 | * d6 - formerly undocumented salc | ||
| 139 | * ea - formerly jmp seg:ofs | ||
| 140 | * | ||
| 141 | * Opcodes we'll probably never support: | ||
| 142 | * 6c-6f - ins,outs. SEGVs if used in userspace | ||
| 143 | * e4-e7 - in,out imm. SEGVs if used in userspace | ||
| 144 | * ec-ef - in,out acc. SEGVs if used in userspace | ||
| 145 | * cc - int3. SIGTRAP if used in userspace | ||
| 146 | * f1 - int1. SIGTRAP if used in userspace | ||
| 147 | * f4 - hlt. SEGVs if used in userspace | ||
| 148 | * fa - cli. SEGVs if used in userspace | ||
| 149 | * fb - sti. SEGVs if used in userspace | ||
| 150 | * | ||
| 151 | * Opcodes which need some work to be supported: | ||
| 152 | * cd - int N. | ||
| 153 | * Used by userspace for "int 80" syscall entry. (Other "int N" | ||
| 154 | * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3). | ||
| 155 | * Not supported since kernel's handling of userspace single-stepping | ||
| 156 | * (TF flag) is fragile. | ||
| 157 | * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad | ||
| 158 | */ | ||
| 98 | #if defined(CONFIG_X86_64) | 159 | #if defined(CONFIG_X86_64) |
| 99 | static volatile u32 good_insns_64[256 / 32] = { | 160 | static volatile u32 good_insns_64[256 / 32] = { |
| 100 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 161 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 101 | /* ---------------------------------------------- */ | 162 | /* ---------------------------------------------- */ |
| 102 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | 163 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */ |
| 103 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | 164 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ |
| 104 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | 165 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */ |
| 105 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | 166 | W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */ |
| 106 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | 167 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
| 107 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 168 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
| 108 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | 169 | W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ |
| 109 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | 170 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ |
| 110 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 171 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
| 111 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 172 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */ |
| 112 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | 173 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ |
| 113 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 174 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
| 114 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | 175 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ |
| 115 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 176 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
| 116 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | 177 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */ |
| 117 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | 178 | W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ |
| 118 | /* ---------------------------------------------- */ | 179 | /* ---------------------------------------------- */ |
| 119 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 180 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 120 | }; | 181 | }; |
| @@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = { | |||
| 122 | #define good_insns_64 NULL | 183 | #define good_insns_64 NULL |
| 123 | #endif | 184 | #endif |
| 124 | 185 | ||
| 125 | /* Using this for both 64-bit and 32-bit apps */ | 186 | /* Using this for both 64-bit and 32-bit apps. |
| 187 | * Opcodes we don't support: | ||
| 188 | * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns | ||
| 189 | * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group. | ||
| 190 | * Also encodes tons of other system insns if mod=11. | ||
| 191 | * Some are in fact non-system: xend, xtest, rdtscp, maybe more | ||
| 192 | * 0f 05 - syscall | ||
| 193 | * 0f 06 - clts (CPL0 insn) | ||
| 194 | * 0f 07 - sysret | ||
| 195 | * 0f 08 - invd (CPL0 insn) | ||
| 196 | * 0f 09 - wbinvd (CPL0 insn) | ||
| 197 | * 0f 0b - ud2 | ||
| 198 | * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?) | ||
| 199 | * 0f 34 - sysenter | ||
| 200 | * 0f 35 - sysexit | ||
| 201 | * 0f 37 - getsec | ||
| 202 | * 0f 78 - vmread (Intel VMX. CPL0 insn) | ||
| 203 | * 0f 79 - vmwrite (Intel VMX. CPL0 insn) | ||
| 204 | * Note: with prefixes, these two opcodes are | ||
| 205 | * extrq/insertq/AVX512 convert vector ops. | ||
| 206 | * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt], | ||
| 207 | * {rd,wr}{fs,gs}base,{s,l,m}fence. | ||
| 208 | * Why? They are all user-executable. | ||
| 209 | */ | ||
| 126 | static volatile u32 good_2byte_insns[256 / 32] = { | 210 | static volatile u32 good_2byte_insns[256 / 32] = { |
| 127 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 211 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 128 | /* ---------------------------------------------- */ | 212 | /* ---------------------------------------------- */ |
| 129 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | 213 | W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */ |
| 130 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | 214 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ |
| 131 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | 215 | W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ |
| 132 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | 216 | W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */ |
| 133 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 217 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ |
| 134 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | 218 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ |
| 135 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | 219 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ |
| 136 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | 220 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */ |
| 137 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | 221 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ |
| 138 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 222 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ |
| 139 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | 223 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ |
| 140 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | 224 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ |
| 141 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | 225 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ |
| 142 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | 226 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ |
| 143 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | 227 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ |
| 144 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | 228 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) /* f0 */ |
| 145 | /* ---------------------------------------------- */ | 229 | /* ---------------------------------------------- */ |
| 146 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 230 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 147 | }; | 231 | }; |
| 148 | #undef W | 232 | #undef W |
| 149 | 233 | ||
| 150 | /* | 234 | /* |
| 151 | * opcodes we'll probably never support: | ||
| 152 | * | ||
| 153 | * 6c-6d, e4-e5, ec-ed - in | ||
| 154 | * 6e-6f, e6-e7, ee-ef - out | ||
| 155 | * cc, cd - int3, int | ||
| 156 | * cf - iret | ||
| 157 | * d6 - illegal instruction | ||
| 158 | * f1 - int1/icebp | ||
| 159 | * f4 - hlt | ||
| 160 | * fa, fb - cli, sti | ||
| 161 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
| 162 | * | ||
| 163 | * invalid opcodes in 64-bit mode: | ||
| 164 | * | ||
| 165 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
| 166 | * 63 - we support this opcode in x86_64 but not in i386. | ||
| 167 | * | ||
| 168 | * opcodes we may need to refine support for: | 235 | * opcodes we may need to refine support for: |
| 169 | * | 236 | * |
| 170 | * 0f - 2-byte instructions: For many of these instructions, the validity | 237 | * 0f - 2-byte instructions: For many of these instructions, the validity |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 553c094b9cd7..a110efca6d06 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
| @@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr, | |||
| 238 | } | 238 | } |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | static const char *page_size_string(struct map_range *mr) | ||
| 242 | { | ||
| 243 | static const char str_1g[] = "1G"; | ||
| 244 | static const char str_2m[] = "2M"; | ||
| 245 | static const char str_4m[] = "4M"; | ||
| 246 | static const char str_4k[] = "4k"; | ||
| 247 | |||
| 248 | if (mr->page_size_mask & (1<<PG_LEVEL_1G)) | ||
| 249 | return str_1g; | ||
| 250 | /* | ||
| 251 | * 32-bit without PAE has a 4M large page size. | ||
| 252 | * PG_LEVEL_2M is misnamed, but we can at least | ||
| 253 | * print out the right size in the string. | ||
| 254 | */ | ||
| 255 | if (IS_ENABLED(CONFIG_X86_32) && | ||
| 256 | !IS_ENABLED(CONFIG_X86_PAE) && | ||
| 257 | mr->page_size_mask & (1<<PG_LEVEL_2M)) | ||
| 258 | return str_4m; | ||
| 259 | |||
| 260 | if (mr->page_size_mask & (1<<PG_LEVEL_2M)) | ||
| 261 | return str_2m; | ||
| 262 | |||
| 263 | return str_4k; | ||
| 264 | } | ||
| 265 | |||
| 241 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, | 266 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
| 242 | unsigned long start, | 267 | unsigned long start, |
| 243 | unsigned long end) | 268 | unsigned long end) |
| @@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, | |||
| 333 | for (i = 0; i < nr_range; i++) | 358 | for (i = 0; i < nr_range; i++) |
| 334 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 359 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
| 335 | mr[i].start, mr[i].end - 1, | 360 | mr[i].start, mr[i].end - 1, |
| 336 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 361 | page_size_string(&mr[i])); |
| 337 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
| 338 | 362 | ||
| 339 | return nr_range; | 363 | return nr_range; |
| 340 | } | 364 | } |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 919b91205cd4..df4552bd239e 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
| @@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = { | |||
| 35 | .flags = -1, | 35 | .flags = -1, |
| 36 | }; | 36 | }; |
| 37 | 37 | ||
| 38 | static unsigned int stack_maxrandom_size(void) | 38 | static unsigned long stack_maxrandom_size(void) |
| 39 | { | 39 | { |
| 40 | unsigned int max = 0; | 40 | unsigned long max = 0; |
| 41 | if ((current->flags & PF_RANDOMIZE) && | 41 | if ((current->flags & PF_RANDOMIZE) && |
| 42 | !(current->personality & ADDR_NO_RANDOMIZE)) { | 42 | !(current->personality & ADDR_NO_RANDOMIZE)) { |
| 43 | max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; | 43 | max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | return max; | 46 | return max; |
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 85afde1fa3e5..a62e0be3a2f1 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile | |||
| @@ -5,6 +5,7 @@ obj-y += geode/ | |||
| 5 | obj-y += goldfish/ | 5 | obj-y += goldfish/ |
| 6 | obj-y += iris/ | 6 | obj-y += iris/ |
| 7 | obj-y += intel-mid/ | 7 | obj-y += intel-mid/ |
| 8 | obj-y += intel-quark/ | ||
| 8 | obj-y += olpc/ | 9 | obj-y += olpc/ |
| 9 | obj-y += scx200/ | 10 | obj-y += scx200/ |
| 10 | obj-y += sfi/ | 11 | obj-y += sfi/ |
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 5fcda7272550..86d0f9e08dd9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
| @@ -91,167 +91,6 @@ ENTRY(efi_call) | |||
| 91 | ret | 91 | ret |
| 92 | ENDPROC(efi_call) | 92 | ENDPROC(efi_call) |
| 93 | 93 | ||
| 94 | #ifdef CONFIG_EFI_MIXED | ||
| 95 | |||
| 96 | /* | ||
| 97 | * We run this function from the 1:1 mapping. | ||
| 98 | * | ||
| 99 | * This function must be invoked with a 1:1 mapped stack. | ||
| 100 | */ | ||
| 101 | ENTRY(__efi64_thunk) | ||
| 102 | movl %ds, %eax | ||
| 103 | push %rax | ||
| 104 | movl %es, %eax | ||
| 105 | push %rax | ||
| 106 | movl %ss, %eax | ||
| 107 | push %rax | ||
| 108 | |||
| 109 | subq $32, %rsp | ||
| 110 | movl %esi, 0x0(%rsp) | ||
| 111 | movl %edx, 0x4(%rsp) | ||
| 112 | movl %ecx, 0x8(%rsp) | ||
| 113 | movq %r8, %rsi | ||
| 114 | movl %esi, 0xc(%rsp) | ||
| 115 | movq %r9, %rsi | ||
| 116 | movl %esi, 0x10(%rsp) | ||
| 117 | |||
| 118 | sgdt save_gdt(%rip) | ||
| 119 | |||
| 120 | leaq 1f(%rip), %rbx | ||
| 121 | movq %rbx, func_rt_ptr(%rip) | ||
| 122 | |||
| 123 | /* Switch to gdt with 32-bit segments */ | ||
| 124 | movl 64(%rsp), %eax | ||
| 125 | lgdt (%rax) | ||
| 126 | |||
| 127 | leaq efi_enter32(%rip), %rax | ||
| 128 | pushq $__KERNEL_CS | ||
| 129 | pushq %rax | ||
| 130 | lretq | ||
| 131 | |||
| 132 | 1: addq $32, %rsp | ||
| 133 | |||
| 134 | lgdt save_gdt(%rip) | ||
| 135 | |||
| 136 | pop %rbx | ||
| 137 | movl %ebx, %ss | ||
| 138 | pop %rbx | ||
| 139 | movl %ebx, %es | ||
| 140 | pop %rbx | ||
| 141 | movl %ebx, %ds | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Convert 32-bit status code into 64-bit. | ||
| 145 | */ | ||
| 146 | test %rax, %rax | ||
| 147 | jz 1f | ||
| 148 | movl %eax, %ecx | ||
| 149 | andl $0x0fffffff, %ecx | ||
| 150 | andl $0xf0000000, %eax | ||
| 151 | shl $32, %rax | ||
| 152 | or %rcx, %rax | ||
| 153 | 1: | ||
| 154 | ret | ||
| 155 | ENDPROC(__efi64_thunk) | ||
| 156 | |||
| 157 | ENTRY(efi_exit32) | ||
| 158 | movq func_rt_ptr(%rip), %rax | ||
| 159 | push %rax | ||
| 160 | mov %rdi, %rax | ||
| 161 | ret | ||
| 162 | ENDPROC(efi_exit32) | ||
| 163 | |||
| 164 | .code32 | ||
| 165 | /* | ||
| 166 | * EFI service pointer must be in %edi. | ||
| 167 | * | ||
| 168 | * The stack should represent the 32-bit calling convention. | ||
| 169 | */ | ||
| 170 | ENTRY(efi_enter32) | ||
| 171 | movl $__KERNEL_DS, %eax | ||
| 172 | movl %eax, %ds | ||
| 173 | movl %eax, %es | ||
| 174 | movl %eax, %ss | ||
| 175 | |||
| 176 | /* Reload pgtables */ | ||
| 177 | movl %cr3, %eax | ||
| 178 | movl %eax, %cr3 | ||
| 179 | |||
| 180 | /* Disable paging */ | ||
| 181 | movl %cr0, %eax | ||
| 182 | btrl $X86_CR0_PG_BIT, %eax | ||
| 183 | movl %eax, %cr0 | ||
| 184 | |||
| 185 | /* Disable long mode via EFER */ | ||
| 186 | movl $MSR_EFER, %ecx | ||
| 187 | rdmsr | ||
| 188 | btrl $_EFER_LME, %eax | ||
| 189 | wrmsr | ||
| 190 | |||
| 191 | call *%edi | ||
| 192 | |||
| 193 | /* We must preserve return value */ | ||
| 194 | movl %eax, %edi | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Some firmware will return with interrupts enabled. Be sure to | ||
| 198 | * disable them before we switch GDTs. | ||
| 199 | */ | ||
| 200 | cli | ||
| 201 | |||
| 202 | movl 68(%esp), %eax | ||
| 203 | movl %eax, 2(%eax) | ||
| 204 | lgdtl (%eax) | ||
| 205 | |||
| 206 | movl %cr4, %eax | ||
| 207 | btsl $(X86_CR4_PAE_BIT), %eax | ||
| 208 | movl %eax, %cr4 | ||
| 209 | |||
| 210 | movl %cr3, %eax | ||
| 211 | movl %eax, %cr3 | ||
| 212 | |||
| 213 | movl $MSR_EFER, %ecx | ||
| 214 | rdmsr | ||
| 215 | btsl $_EFER_LME, %eax | ||
| 216 | wrmsr | ||
| 217 | |||
| 218 | xorl %eax, %eax | ||
| 219 | lldt %ax | ||
| 220 | |||
| 221 | movl 72(%esp), %eax | ||
| 222 | pushl $__KERNEL_CS | ||
| 223 | pushl %eax | ||
| 224 | |||
| 225 | /* Enable paging */ | ||
| 226 | movl %cr0, %eax | ||
| 227 | btsl $X86_CR0_PG_BIT, %eax | ||
| 228 | movl %eax, %cr0 | ||
| 229 | lret | ||
| 230 | ENDPROC(efi_enter32) | ||
| 231 | |||
| 232 | .data | ||
| 233 | .balign 8 | ||
| 234 | .global efi32_boot_gdt | ||
| 235 | efi32_boot_gdt: .word 0 | ||
| 236 | .quad 0 | ||
| 237 | |||
| 238 | save_gdt: .word 0 | ||
| 239 | .quad 0 | ||
| 240 | func_rt_ptr: .quad 0 | ||
| 241 | |||
| 242 | .global efi_gdt64 | ||
| 243 | efi_gdt64: | ||
| 244 | .word efi_gdt64_end - efi_gdt64 | ||
| 245 | .long 0 /* Filled out by user */ | ||
| 246 | .word 0 | ||
| 247 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
| 248 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
| 249 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
| 250 | .quad 0x0080890000000000 /* TS descriptor */ | ||
| 251 | .quad 0x0000000000000000 /* TS continued */ | ||
| 252 | efi_gdt64_end: | ||
| 253 | #endif /* CONFIG_EFI_MIXED */ | ||
| 254 | |||
| 255 | .data | 94 | .data |
| 256 | ENTRY(efi_scratch) | 95 | ENTRY(efi_scratch) |
| 257 | .fill 3,8,0 | 96 | .fill 3,8,0 |
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 8806fa73e6e6..ff85d28c50f2 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S | |||
| @@ -1,9 +1,26 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2014 Intel Corporation; author Matt Fleming | 2 | * Copyright (C) 2014 Intel Corporation; author Matt Fleming |
| 3 | * | ||
| 4 | * Support for invoking 32-bit EFI runtime services from a 64-bit | ||
| 5 | * kernel. | ||
| 6 | * | ||
| 7 | * The below thunking functions are only used after ExitBootServices() | ||
| 8 | * has been called. This simplifies things considerably as compared with | ||
| 9 | * the early EFI thunking because we can leave all the kernel state | ||
| 10 | * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime | ||
| 11 | * services from __KERNEL32_CS. This means we can continue to service | ||
| 12 | * interrupts across an EFI mixed mode call. | ||
| 13 | * | ||
| 14 | * We do however, need to handle the fact that we're running in a full | ||
| 15 | * 64-bit virtual address space. Things like the stack and instruction | ||
| 16 | * addresses need to be accessible by the 32-bit firmware, so we rely on | ||
| 17 | * using the identity mappings in the EFI page table to access the stack | ||
| 18 | * and kernel text (see efi_setup_page_tables()). | ||
| 3 | */ | 19 | */ |
| 4 | 20 | ||
| 5 | #include <linux/linkage.h> | 21 | #include <linux/linkage.h> |
| 6 | #include <asm/page_types.h> | 22 | #include <asm/page_types.h> |
| 23 | #include <asm/segment.h> | ||
| 7 | 24 | ||
| 8 | .text | 25 | .text |
| 9 | .code64 | 26 | .code64 |
| @@ -33,14 +50,6 @@ ENTRY(efi64_thunk) | |||
| 33 | leaq efi_exit32(%rip), %rbx | 50 | leaq efi_exit32(%rip), %rbx |
| 34 | subq %rax, %rbx | 51 | subq %rax, %rbx |
| 35 | movl %ebx, 8(%rsp) | 52 | movl %ebx, 8(%rsp) |
| 36 | leaq efi_gdt64(%rip), %rbx | ||
| 37 | subq %rax, %rbx | ||
| 38 | movl %ebx, 2(%ebx) | ||
| 39 | movl %ebx, 4(%rsp) | ||
| 40 | leaq efi_gdt32(%rip), %rbx | ||
| 41 | subq %rax, %rbx | ||
| 42 | movl %ebx, 2(%ebx) | ||
| 43 | movl %ebx, (%rsp) | ||
| 44 | 53 | ||
| 45 | leaq __efi64_thunk(%rip), %rbx | 54 | leaq __efi64_thunk(%rip), %rbx |
| 46 | subq %rax, %rbx | 55 | subq %rax, %rbx |
| @@ -52,14 +61,92 @@ ENTRY(efi64_thunk) | |||
| 52 | retq | 61 | retq |
| 53 | ENDPROC(efi64_thunk) | 62 | ENDPROC(efi64_thunk) |
| 54 | 63 | ||
| 55 | .data | 64 | /* |
| 56 | efi_gdt32: | 65 | * We run this function from the 1:1 mapping. |
| 57 | .word efi_gdt32_end - efi_gdt32 | 66 | * |
| 58 | .long 0 /* Filled out above */ | 67 | * This function must be invoked with a 1:1 mapped stack. |
| 59 | .word 0 | 68 | */ |
| 60 | .quad 0x0000000000000000 /* NULL descriptor */ | 69 | ENTRY(__efi64_thunk) |
| 61 | .quad 0x00cf9a000000ffff /* __KERNEL_CS */ | 70 | movl %ds, %eax |
| 62 | .quad 0x00cf93000000ffff /* __KERNEL_DS */ | 71 | push %rax |
| 63 | efi_gdt32_end: | 72 | movl %es, %eax |
| 73 | push %rax | ||
| 74 | movl %ss, %eax | ||
| 75 | push %rax | ||
| 76 | |||
| 77 | subq $32, %rsp | ||
| 78 | movl %esi, 0x0(%rsp) | ||
| 79 | movl %edx, 0x4(%rsp) | ||
| 80 | movl %ecx, 0x8(%rsp) | ||
| 81 | movq %r8, %rsi | ||
| 82 | movl %esi, 0xc(%rsp) | ||
| 83 | movq %r9, %rsi | ||
| 84 | movl %esi, 0x10(%rsp) | ||
| 85 | |||
| 86 | leaq 1f(%rip), %rbx | ||
| 87 | movq %rbx, func_rt_ptr(%rip) | ||
| 88 | |||
| 89 | /* Switch to 32-bit descriptor */ | ||
| 90 | pushq $__KERNEL32_CS | ||
| 91 | leaq efi_enter32(%rip), %rax | ||
| 92 | pushq %rax | ||
| 93 | lretq | ||
| 94 | |||
| 95 | 1: addq $32, %rsp | ||
| 96 | |||
| 97 | pop %rbx | ||
| 98 | movl %ebx, %ss | ||
| 99 | pop %rbx | ||
| 100 | movl %ebx, %es | ||
| 101 | pop %rbx | ||
| 102 | movl %ebx, %ds | ||
| 64 | 103 | ||
| 104 | /* | ||
| 105 | * Convert 32-bit status code into 64-bit. | ||
| 106 | */ | ||
| 107 | test %rax, %rax | ||
| 108 | jz 1f | ||
| 109 | movl %eax, %ecx | ||
| 110 | andl $0x0fffffff, %ecx | ||
| 111 | andl $0xf0000000, %eax | ||
| 112 | shl $32, %rax | ||
| 113 | or %rcx, %rax | ||
| 114 | 1: | ||
| 115 | ret | ||
| 116 | ENDPROC(__efi64_thunk) | ||
| 117 | |||
| 118 | ENTRY(efi_exit32) | ||
| 119 | movq func_rt_ptr(%rip), %rax | ||
| 120 | push %rax | ||
| 121 | mov %rdi, %rax | ||
| 122 | ret | ||
| 123 | ENDPROC(efi_exit32) | ||
| 124 | |||
| 125 | .code32 | ||
| 126 | /* | ||
| 127 | * EFI service pointer must be in %edi. | ||
| 128 | * | ||
| 129 | * The stack should represent the 32-bit calling convention. | ||
| 130 | */ | ||
| 131 | ENTRY(efi_enter32) | ||
| 132 | movl $__KERNEL_DS, %eax | ||
| 133 | movl %eax, %ds | ||
| 134 | movl %eax, %es | ||
| 135 | movl %eax, %ss | ||
| 136 | |||
| 137 | call *%edi | ||
| 138 | |||
| 139 | /* We must preserve return value */ | ||
| 140 | movl %eax, %edi | ||
| 141 | |||
| 142 | movl 72(%esp), %eax | ||
| 143 | pushl $__KERNEL_CS | ||
| 144 | pushl %eax | ||
| 145 | |||
| 146 | lret | ||
| 147 | ENDPROC(efi_enter32) | ||
| 148 | |||
| 149 | .data | ||
| 150 | .balign 8 | ||
| 151 | func_rt_ptr: .quad 0 | ||
| 65 | efi_saved_sp: .quad 0 | 152 | efi_saved_sp: .quad 0 |
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile new file mode 100644 index 000000000000..9cc57ed36022 --- /dev/null +++ b/arch/x86/platform/intel-quark/Makefile | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | obj-$(CONFIG_INTEL_IMR) += imr.o | ||
| 2 | obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o | ||
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c new file mode 100644 index 000000000000..0ee619f9fcb7 --- /dev/null +++ b/arch/x86/platform/intel-quark/imr.c | |||
| @@ -0,0 +1,661 @@ | |||
| 1 | /** | ||
| 2 | * imr.c | ||
| 3 | * | ||
| 4 | * Copyright(c) 2013 Intel Corporation. | ||
| 5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> | ||
| 6 | * | ||
| 7 | * IMR registers define an isolated region of memory that can | ||
| 8 | * be masked to prohibit certain system agents from accessing memory. | ||
| 9 | * When a device behind a masked port performs an access - snooped or | ||
| 10 | * not, an IMR may optionally prevent that transaction from changing | ||
| 11 | * the state of memory or from getting correct data in response to the | ||
| 12 | * operation. | ||
| 13 | * | ||
| 14 | * Write data will be dropped and reads will return 0xFFFFFFFF, the | ||
| 15 | * system will reset and system BIOS will print out an error message to | ||
| 16 | * inform the user that an IMR has been violated. | ||
| 17 | * | ||
| 18 | * This code is based on the Linux MTRR code and reference code from | ||
| 19 | * Intel's Quark BSP EFI, Linux and grub code. | ||
| 20 | * | ||
| 21 | * See quark-x1000-datasheet.pdf for register definitions. | ||
| 22 | * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf | ||
| 23 | */ | ||
| 24 | |||
| 25 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 26 | |||
| 27 | #include <asm-generic/sections.h> | ||
| 28 | #include <asm/cpu_device_id.h> | ||
| 29 | #include <asm/imr.h> | ||
| 30 | #include <asm/iosf_mbi.h> | ||
| 31 | #include <linux/debugfs.h> | ||
| 32 | #include <linux/init.h> | ||
| 33 | #include <linux/mm.h> | ||
| 34 | #include <linux/module.h> | ||
| 35 | #include <linux/types.h> | ||
| 36 | |||
| 37 | struct imr_device { | ||
| 38 | struct dentry *file; | ||
| 39 | bool init; | ||
| 40 | struct mutex lock; | ||
| 41 | int max_imr; | ||
| 42 | int reg_base; | ||
| 43 | }; | ||
| 44 | |||
| 45 | static struct imr_device imr_dev; | ||
| 46 | |||
| 47 | /* | ||
| 48 | * IMR read/write mask control registers. | ||
| 49 | * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for | ||
| 50 | * bit definitions. | ||
| 51 | * | ||
| 52 | * addr_hi | ||
| 53 | * 31 Lock bit | ||
| 54 | * 30:24 Reserved | ||
| 55 | * 23:2 1 KiB aligned lo address | ||
| 56 | * 1:0 Reserved | ||
| 57 | * | ||
| 58 | * addr_hi | ||
| 59 | * 31:24 Reserved | ||
| 60 | * 23:2 1 KiB aligned hi address | ||
| 61 | * 1:0 Reserved | ||
| 62 | */ | ||
| 63 | #define IMR_LOCK BIT(31) | ||
| 64 | |||
| 65 | struct imr_regs { | ||
| 66 | u32 addr_lo; | ||
| 67 | u32 addr_hi; | ||
| 68 | u32 rmask; | ||
| 69 | u32 wmask; | ||
| 70 | }; | ||
| 71 | |||
| 72 | #define IMR_NUM_REGS (sizeof(struct imr_regs)/sizeof(u32)) | ||
| 73 | #define IMR_SHIFT 8 | ||
| 74 | #define imr_to_phys(x) ((x) << IMR_SHIFT) | ||
| 75 | #define phys_to_imr(x) ((x) >> IMR_SHIFT) | ||
| 76 | |||
| 77 | /** | ||
| 78 | * imr_is_enabled - true if an IMR is enabled false otherwise. | ||
| 79 | * | ||
| 80 | * Determines if an IMR is enabled based on address range and read/write | ||
| 81 | * mask. An IMR set with an address range set to zero and a read/write | ||
| 82 | * access mask set to all is considered to be disabled. An IMR in any | ||
| 83 | * other state - for example set to zero but without read/write access | ||
| 84 | * all is considered to be enabled. This definition of disabled is how | ||
| 85 | * firmware switches off an IMR and is maintained in kernel for | ||
| 86 | * consistency. | ||
| 87 | * | ||
| 88 | * @imr: pointer to IMR descriptor. | ||
| 89 | * @return: true if IMR enabled false if disabled. | ||
| 90 | */ | ||
| 91 | static inline int imr_is_enabled(struct imr_regs *imr) | ||
| 92 | { | ||
| 93 | return !(imr->rmask == IMR_READ_ACCESS_ALL && | ||
| 94 | imr->wmask == IMR_WRITE_ACCESS_ALL && | ||
| 95 | imr_to_phys(imr->addr_lo) == 0 && | ||
| 96 | imr_to_phys(imr->addr_hi) == 0); | ||
| 97 | } | ||
| 98 | |||
| 99 | /** | ||
| 100 | * imr_read - read an IMR at a given index. | ||
| 101 | * | ||
| 102 | * Requires caller to hold imr mutex. | ||
| 103 | * | ||
| 104 | * @idev: pointer to imr_device structure. | ||
| 105 | * @imr_id: IMR entry to read. | ||
| 106 | * @imr: IMR structure representing address and access masks. | ||
| 107 | * @return: 0 on success or error code passed from mbi_iosf on failure. | ||
| 108 | */ | ||
| 109 | static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) | ||
| 110 | { | ||
| 111 | u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; | ||
| 112 | int ret; | ||
| 113 | |||
| 114 | ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, | ||
| 115 | reg++, &imr->addr_lo); | ||
| 116 | if (ret) | ||
| 117 | return ret; | ||
| 118 | |||
| 119 | ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, | ||
| 120 | reg++, &imr->addr_hi); | ||
| 121 | if (ret) | ||
| 122 | return ret; | ||
| 123 | |||
| 124 | ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, | ||
| 125 | reg++, &imr->rmask); | ||
| 126 | if (ret) | ||
| 127 | return ret; | ||
| 128 | |||
| 129 | return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ, | ||
| 130 | reg++, &imr->wmask); | ||
| 131 | } | ||
| 132 | |||
| 133 | /** | ||
| 134 | * imr_write - write an IMR at a given index. | ||
| 135 | * | ||
| 136 | * Requires caller to hold imr mutex. | ||
| 137 | * Note lock bits need to be written independently of address bits. | ||
| 138 | * | ||
| 139 | * @idev: pointer to imr_device structure. | ||
| 140 | * @imr_id: IMR entry to write. | ||
| 141 | * @imr: IMR structure representing address and access masks. | ||
| 142 | * @lock: indicates if the IMR lock bit should be applied. | ||
| 143 | * @return: 0 on success or error code passed from mbi_iosf on failure. | ||
| 144 | */ | ||
| 145 | static int imr_write(struct imr_device *idev, u32 imr_id, | ||
| 146 | struct imr_regs *imr, bool lock) | ||
| 147 | { | ||
| 148 | unsigned long flags; | ||
| 149 | u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; | ||
| 150 | int ret; | ||
| 151 | |||
| 152 | local_irq_save(flags); | ||
| 153 | |||
| 154 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++, | ||
| 155 | imr->addr_lo); | ||
| 156 | if (ret) | ||
| 157 | goto failed; | ||
| 158 | |||
| 159 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, | ||
| 160 | reg++, imr->addr_hi); | ||
| 161 | if (ret) | ||
| 162 | goto failed; | ||
| 163 | |||
| 164 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, | ||
| 165 | reg++, imr->rmask); | ||
| 166 | if (ret) | ||
| 167 | goto failed; | ||
| 168 | |||
| 169 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, | ||
| 170 | reg++, imr->wmask); | ||
| 171 | if (ret) | ||
| 172 | goto failed; | ||
| 173 | |||
| 174 | /* Lock bit must be set separately to addr_lo address bits. */ | ||
| 175 | if (lock) { | ||
| 176 | imr->addr_lo |= IMR_LOCK; | ||
| 177 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, | ||
| 178 | reg - IMR_NUM_REGS, imr->addr_lo); | ||
| 179 | if (ret) | ||
| 180 | goto failed; | ||
| 181 | } | ||
| 182 | |||
| 183 | local_irq_restore(flags); | ||
| 184 | return 0; | ||
| 185 | failed: | ||
| 186 | /* | ||
| 187 | * If writing to the IOSF failed then we're in an unknown state, | ||
| 188 | * likely a very bad state. An IMR in an invalid state will almost | ||
| 189 | * certainly lead to a memory access violation. | ||
| 190 | */ | ||
| 191 | local_irq_restore(flags); | ||
| 192 | WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n", | ||
| 193 | imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK); | ||
| 194 | |||
| 195 | return ret; | ||
| 196 | } | ||
| 197 | |||
| 198 | /** | ||
| 199 | * imr_dbgfs_state_show - print state of IMR registers. | ||
| 200 | * | ||
| 201 | * @s: pointer to seq_file for output. | ||
| 202 | * @unused: unused parameter. | ||
| 203 | * @return: 0 on success or error code passed from mbi_iosf on failure. | ||
| 204 | */ | ||
| 205 | static int imr_dbgfs_state_show(struct seq_file *s, void *unused) | ||
| 206 | { | ||
| 207 | phys_addr_t base; | ||
| 208 | phys_addr_t end; | ||
| 209 | int i; | ||
| 210 | struct imr_device *idev = s->private; | ||
| 211 | struct imr_regs imr; | ||
| 212 | size_t size; | ||
| 213 | int ret = -ENODEV; | ||
| 214 | |||
| 215 | mutex_lock(&idev->lock); | ||
| 216 | |||
| 217 | for (i = 0; i < idev->max_imr; i++) { | ||
| 218 | |||
| 219 | ret = imr_read(idev, i, &imr); | ||
| 220 | if (ret) | ||
| 221 | break; | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Remember to add IMR_ALIGN bytes to size to indicate the | ||
| 225 | * inherent IMR_ALIGN size bytes contained in the masked away | ||
| 226 | * lower ten bits. | ||
| 227 | */ | ||
| 228 | if (imr_is_enabled(&imr)) { | ||
| 229 | base = imr_to_phys(imr.addr_lo); | ||
| 230 | end = imr_to_phys(imr.addr_hi) + IMR_MASK; | ||
| 231 | } else { | ||
| 232 | base = 0; | ||
| 233 | end = 0; | ||
| 234 | } | ||
| 235 | size = end - base; | ||
| 236 | seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx " | ||
| 237 | "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i, | ||
| 238 | &base, &end, size, imr.rmask, imr.wmask, | ||
| 239 | imr_is_enabled(&imr) ? "enabled " : "disabled", | ||
| 240 | imr.addr_lo & IMR_LOCK ? "locked" : "unlocked"); | ||
| 241 | } | ||
| 242 | |||
| 243 | mutex_unlock(&idev->lock); | ||
| 244 | return ret; | ||
| 245 | } | ||
| 246 | |||
| 247 | /** | ||
| 248 | * imr_state_open - debugfs open callback. | ||
| 249 | * | ||
| 250 | * @inode: pointer to struct inode. | ||
| 251 | * @file: pointer to struct file. | ||
| 252 | * @return: result of single open. | ||
| 253 | */ | ||
| 254 | static int imr_state_open(struct inode *inode, struct file *file) | ||
| 255 | { | ||
| 256 | return single_open(file, imr_dbgfs_state_show, inode->i_private); | ||
| 257 | } | ||
| 258 | |||
| 259 | static const struct file_operations imr_state_ops = { | ||
| 260 | .open = imr_state_open, | ||
| 261 | .read = seq_read, | ||
| 262 | .llseek = seq_lseek, | ||
| 263 | .release = single_release, | ||
| 264 | }; | ||
| 265 | |||
| 266 | /** | ||
| 267 | * imr_debugfs_register - register debugfs hooks. | ||
| 268 | * | ||
| 269 | * @idev: pointer to imr_device structure. | ||
| 270 | * @return: 0 on success - errno on failure. | ||
| 271 | */ | ||
| 272 | static int imr_debugfs_register(struct imr_device *idev) | ||
| 273 | { | ||
| 274 | idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL, | ||
| 275 | idev, &imr_state_ops); | ||
| 276 | return PTR_ERR_OR_ZERO(idev->file); | ||
| 277 | } | ||
| 278 | |||
| 279 | /** | ||
| 280 | * imr_debugfs_unregister - unregister debugfs hooks. | ||
| 281 | * | ||
| 282 | * @idev: pointer to imr_device structure. | ||
| 283 | * @return: | ||
| 284 | */ | ||
| 285 | static void imr_debugfs_unregister(struct imr_device *idev) | ||
| 286 | { | ||
| 287 | debugfs_remove(idev->file); | ||
| 288 | } | ||
| 289 | |||
| 290 | /** | ||
| 291 | * imr_check_params - check passed address range IMR alignment and non-zero size | ||
| 292 | * | ||
| 293 | * @base: base address of intended IMR. | ||
| 294 | * @size: size of intended IMR. | ||
| 295 | * @return: zero on valid range -EINVAL on unaligned base/size. | ||
| 296 | */ | ||
| 297 | static int imr_check_params(phys_addr_t base, size_t size) | ||
| 298 | { | ||
| 299 | if ((base & IMR_MASK) || (size & IMR_MASK)) { | ||
| 300 | pr_err("base %pa size 0x%08zx must align to 1KiB\n", | ||
| 301 | &base, size); | ||
| 302 | return -EINVAL; | ||
| 303 | } | ||
| 304 | if (size == 0) | ||
| 305 | return -EINVAL; | ||
| 306 | |||
| 307 | return 0; | ||
| 308 | } | ||
| 309 | |||
| 310 | /** | ||
| 311 | * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends. | ||
| 312 | * | ||
| 313 | * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the | ||
| 314 | * value in the register. We need to subtract IMR_ALIGN bytes from input sizes | ||
| 315 | * as a result. | ||
| 316 | * | ||
| 317 | * @size: input size bytes. | ||
| 318 | * @return: reduced size. | ||
| 319 | */ | ||
| 320 | static inline size_t imr_raw_size(size_t size) | ||
| 321 | { | ||
| 322 | return size - IMR_ALIGN; | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 326 | * imr_address_overlap - detects an address overlap. | ||
| 327 | * | ||
| 328 | * @addr: address to check against an existing IMR. | ||
| 329 | * @imr: imr being checked. | ||
| 330 | * @return: true for overlap false for no overlap. | ||
| 331 | */ | ||
| 332 | static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr) | ||
| 333 | { | ||
| 334 | return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi); | ||
| 335 | } | ||
| 336 | |||
| 337 | /** | ||
| 338 | * imr_add_range - add an Isolated Memory Region. | ||
| 339 | * | ||
| 340 | * @base: physical base address of region aligned to 1KiB. | ||
| 341 | * @size: physical size of region in bytes must be aligned to 1KiB. | ||
| 342 | * @read_mask: read access mask. | ||
| 343 | * @write_mask: write access mask. | ||
| 344 | * @lock: indicates whether or not to permanently lock this region. | ||
| 345 | * @return: zero on success or negative value indicating error. | ||
| 346 | */ | ||
| 347 | int imr_add_range(phys_addr_t base, size_t size, | ||
| 348 | unsigned int rmask, unsigned int wmask, bool lock) | ||
| 349 | { | ||
| 350 | phys_addr_t end; | ||
| 351 | unsigned int i; | ||
| 352 | struct imr_device *idev = &imr_dev; | ||
| 353 | struct imr_regs imr; | ||
| 354 | size_t raw_size; | ||
| 355 | int reg; | ||
| 356 | int ret; | ||
| 357 | |||
| 358 | if (WARN_ONCE(idev->init == false, "driver not initialized")) | ||
| 359 | return -ENODEV; | ||
| 360 | |||
| 361 | ret = imr_check_params(base, size); | ||
| 362 | if (ret) | ||
| 363 | return ret; | ||
| 364 | |||
| 365 | /* Tweak the size value. */ | ||
| 366 | raw_size = imr_raw_size(size); | ||
| 367 | end = base + raw_size; | ||
| 368 | |||
| 369 | /* | ||
| 370 | * Check for reserved IMR value common to firmware, kernel and grub | ||
| 371 | * indicating a disabled IMR. | ||
| 372 | */ | ||
| 373 | imr.addr_lo = phys_to_imr(base); | ||
| 374 | imr.addr_hi = phys_to_imr(end); | ||
| 375 | imr.rmask = rmask; | ||
| 376 | imr.wmask = wmask; | ||
| 377 | if (!imr_is_enabled(&imr)) | ||
| 378 | return -ENOTSUPP; | ||
| 379 | |||
| 380 | mutex_lock(&idev->lock); | ||
| 381 | |||
| 382 | /* | ||
| 383 | * Find a free IMR while checking for an existing overlapping range. | ||
| 384 | * Note there's no restriction in silicon to prevent IMR overlaps. | ||
| 385 | * For the sake of simplicity and ease in defining/debugging an IMR | ||
| 386 | * memory map we exclude IMR overlaps. | ||
| 387 | */ | ||
| 388 | reg = -1; | ||
| 389 | for (i = 0; i < idev->max_imr; i++) { | ||
| 390 | ret = imr_read(idev, i, &imr); | ||
| 391 | if (ret) | ||
| 392 | goto failed; | ||
| 393 | |||
| 394 | /* Find overlap @ base or end of requested range. */ | ||
| 395 | ret = -EINVAL; | ||
| 396 | if (imr_is_enabled(&imr)) { | ||
| 397 | if (imr_address_overlap(base, &imr)) | ||
| 398 | goto failed; | ||
| 399 | if (imr_address_overlap(end, &imr)) | ||
| 400 | goto failed; | ||
| 401 | } else { | ||
| 402 | reg = i; | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | /* Error out if we have no free IMR entries. */ | ||
| 407 | if (reg == -1) { | ||
| 408 | ret = -ENOMEM; | ||
| 409 | goto failed; | ||
| 410 | } | ||
| 411 | |||
| 412 | pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n", | ||
| 413 | reg, &base, &end, raw_size, rmask, wmask); | ||
| 414 | |||
| 415 | /* Enable IMR at specified range and access mask. */ | ||
| 416 | imr.addr_lo = phys_to_imr(base); | ||
| 417 | imr.addr_hi = phys_to_imr(end); | ||
| 418 | imr.rmask = rmask; | ||
| 419 | imr.wmask = wmask; | ||
| 420 | |||
| 421 | ret = imr_write(idev, reg, &imr, lock); | ||
| 422 | if (ret < 0) { | ||
| 423 | /* | ||
| 424 | * In the highly unlikely event iosf_mbi_write failed | ||
| 425 | * attempt to rollback the IMR setup skipping the trapping | ||
| 426 | * of further IOSF write failures. | ||
| 427 | */ | ||
| 428 | imr.addr_lo = 0; | ||
| 429 | imr.addr_hi = 0; | ||
| 430 | imr.rmask = IMR_READ_ACCESS_ALL; | ||
| 431 | imr.wmask = IMR_WRITE_ACCESS_ALL; | ||
| 432 | imr_write(idev, reg, &imr, false); | ||
| 433 | } | ||
| 434 | failed: | ||
| 435 | mutex_unlock(&idev->lock); | ||
| 436 | return ret; | ||
| 437 | } | ||
| 438 | EXPORT_SYMBOL_GPL(imr_add_range); | ||
| 439 | |||
| 440 | /** | ||
| 441 | * __imr_remove_range - delete an Isolated Memory Region. | ||
| 442 | * | ||
| 443 | * This function allows you to delete an IMR by its index specified by reg or | ||
| 444 | * by address range specified by base and size respectively. If you specify an | ||
| 445 | * index on its own the base and size parameters are ignored. | ||
| 446 | * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored. | ||
| 447 | * imr_remove_range(-1, base, size); delete IMR from base to base+size. | ||
| 448 | * | ||
| 449 | * @reg: imr index to remove. | ||
| 450 | * @base: physical base address of region aligned to 1 KiB. | ||
| 451 | * @size: physical size of region in bytes aligned to 1 KiB. | ||
| 452 | * @return: -EINVAL on invalid range or out or range id | ||
| 453 | * -ENODEV if reg is valid but no IMR exists or is locked | ||
| 454 | * 0 on success. | ||
| 455 | */ | ||
| 456 | static int __imr_remove_range(int reg, phys_addr_t base, size_t size) | ||
| 457 | { | ||
| 458 | phys_addr_t end; | ||
| 459 | bool found = false; | ||
| 460 | unsigned int i; | ||
| 461 | struct imr_device *idev = &imr_dev; | ||
| 462 | struct imr_regs imr; | ||
| 463 | size_t raw_size; | ||
| 464 | int ret = 0; | ||
| 465 | |||
| 466 | if (WARN_ONCE(idev->init == false, "driver not initialized")) | ||
| 467 | return -ENODEV; | ||
| 468 | |||
| 469 | /* | ||
| 470 | * Validate address range if deleting by address, else we are | ||
| 471 | * deleting by index where base and size will be ignored. | ||
| 472 | */ | ||
| 473 | if (reg == -1) { | ||
| 474 | ret = imr_check_params(base, size); | ||
| 475 | if (ret) | ||
| 476 | return ret; | ||
| 477 | } | ||
| 478 | |||
| 479 | /* Tweak the size value. */ | ||
| 480 | raw_size = imr_raw_size(size); | ||
| 481 | end = base + raw_size; | ||
| 482 | |||
| 483 | mutex_lock(&idev->lock); | ||
| 484 | |||
| 485 | if (reg >= 0) { | ||
| 486 | /* If a specific IMR is given try to use it. */ | ||
| 487 | ret = imr_read(idev, reg, &imr); | ||
| 488 | if (ret) | ||
| 489 | goto failed; | ||
| 490 | |||
| 491 | if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) { | ||
| 492 | ret = -ENODEV; | ||
| 493 | goto failed; | ||
| 494 | } | ||
| 495 | found = true; | ||
| 496 | } else { | ||
| 497 | /* Search for match based on address range. */ | ||
| 498 | for (i = 0; i < idev->max_imr; i++) { | ||
| 499 | ret = imr_read(idev, i, &imr); | ||
| 500 | if (ret) | ||
| 501 | goto failed; | ||
| 502 | |||
| 503 | if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) | ||
| 504 | continue; | ||
| 505 | |||
| 506 | if ((imr_to_phys(imr.addr_lo) == base) && | ||
| 507 | (imr_to_phys(imr.addr_hi) == end)) { | ||
| 508 | found = true; | ||
| 509 | reg = i; | ||
| 510 | break; | ||
| 511 | } | ||
| 512 | } | ||
| 513 | } | ||
| 514 | |||
| 515 | if (!found) { | ||
| 516 | ret = -ENODEV; | ||
| 517 | goto failed; | ||
| 518 | } | ||
| 519 | |||
| 520 | pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size); | ||
| 521 | |||
| 522 | /* Tear down the IMR. */ | ||
| 523 | imr.addr_lo = 0; | ||
| 524 | imr.addr_hi = 0; | ||
| 525 | imr.rmask = IMR_READ_ACCESS_ALL; | ||
| 526 | imr.wmask = IMR_WRITE_ACCESS_ALL; | ||
| 527 | |||
| 528 | ret = imr_write(idev, reg, &imr, false); | ||
| 529 | |||
| 530 | failed: | ||
| 531 | mutex_unlock(&idev->lock); | ||
| 532 | return ret; | ||
| 533 | } | ||
| 534 | |||
| 535 | /** | ||
| 536 | * imr_remove_range - delete an Isolated Memory Region by address | ||
| 537 | * | ||
| 538 | * This function allows you to delete an IMR by an address range specified | ||
| 539 | * by base and size respectively. | ||
| 540 | * imr_remove_range(base, size); delete IMR from base to base+size. | ||
| 541 | * | ||
| 542 | * @base: physical base address of region aligned to 1 KiB. | ||
| 543 | * @size: physical size of region in bytes aligned to 1 KiB. | ||
| 544 | * @return: -EINVAL on invalid range or out or range id | ||
| 545 | * -ENODEV if reg is valid but no IMR exists or is locked | ||
| 546 | * 0 on success. | ||
| 547 | */ | ||
| 548 | int imr_remove_range(phys_addr_t base, size_t size) | ||
| 549 | { | ||
| 550 | return __imr_remove_range(-1, base, size); | ||
| 551 | } | ||
| 552 | EXPORT_SYMBOL_GPL(imr_remove_range); | ||
| 553 | |||
| 554 | /** | ||
| 555 | * imr_clear - delete an Isolated Memory Region by index | ||
| 556 | * | ||
| 557 | * This function allows you to delete an IMR by an address range specified | ||
| 558 | * by the index of the IMR. Useful for initial sanitization of the IMR | ||
| 559 | * address map. | ||
| 560 | * imr_ge(base, size); delete IMR from base to base+size. | ||
| 561 | * | ||
| 562 | * @reg: imr index to remove. | ||
| 563 | * @return: -EINVAL on invalid range or out or range id | ||
| 564 | * -ENODEV if reg is valid but no IMR exists or is locked | ||
| 565 | * 0 on success. | ||
| 566 | */ | ||
| 567 | static inline int imr_clear(int reg) | ||
| 568 | { | ||
| 569 | return __imr_remove_range(reg, 0, 0); | ||
| 570 | } | ||
| 571 | |||
| 572 | /** | ||
| 573 | * imr_fixup_memmap - Tear down IMRs used during bootup. | ||
| 574 | * | ||
| 575 | * BIOS and Grub both setup IMRs around compressed kernel, initrd memory | ||
| 576 | * that need to be removed before the kernel hands out one of the IMR | ||
| 577 | * encased addresses to a downstream DMA agent such as the SD or Ethernet. | ||
| 578 | * IMRs on Galileo are setup to immediately reset the system on violation. | ||
| 579 | * As a result if you're running a root filesystem from SD - you'll need | ||
| 580 | * the boot-time IMRs torn down or you'll find seemingly random resets when | ||
| 581 | * using your filesystem. | ||
| 582 | * | ||
| 583 | * @idev: pointer to imr_device structure. | ||
| 584 | * @return: | ||
| 585 | */ | ||
| 586 | static void __init imr_fixup_memmap(struct imr_device *idev) | ||
| 587 | { | ||
| 588 | phys_addr_t base = virt_to_phys(&_text); | ||
| 589 | size_t size = virt_to_phys(&__end_rodata) - base; | ||
| 590 | int i; | ||
| 591 | int ret; | ||
| 592 | |||
| 593 | /* Tear down all existing unlocked IMRs. */ | ||
| 594 | for (i = 0; i < idev->max_imr; i++) | ||
| 595 | imr_clear(i); | ||
| 596 | |||
| 597 | /* | ||
| 598 | * Setup a locked IMR around the physical extent of the kernel | ||
| 599 | * from the beginning of the .text secton to the end of the | ||
| 600 | * .rodata section as one physically contiguous block. | ||
| 601 | */ | ||
| 602 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true); | ||
| 603 | if (ret < 0) { | ||
| 604 | pr_err("unable to setup IMR for kernel: (%p - %p)\n", | ||
| 605 | &_text, &__end_rodata); | ||
| 606 | } else { | ||
| 607 | pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n", | ||
| 608 | size / 1024, &_text, &__end_rodata); | ||
| 609 | } | ||
| 610 | |||
| 611 | } | ||
| 612 | |||
| 613 | static const struct x86_cpu_id imr_ids[] __initconst = { | ||
| 614 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ | ||
| 615 | {} | ||
| 616 | }; | ||
| 617 | MODULE_DEVICE_TABLE(x86cpu, imr_ids); | ||
| 618 | |||
| 619 | /** | ||
| 620 | * imr_init - entry point for IMR driver. | ||
| 621 | * | ||
| 622 | * return: -ENODEV for no IMR support 0 if good to go. | ||
| 623 | */ | ||
| 624 | static int __init imr_init(void) | ||
| 625 | { | ||
| 626 | struct imr_device *idev = &imr_dev; | ||
| 627 | int ret; | ||
| 628 | |||
| 629 | if (!x86_match_cpu(imr_ids) || !iosf_mbi_available()) | ||
| 630 | return -ENODEV; | ||
| 631 | |||
| 632 | idev->max_imr = QUARK_X1000_IMR_MAX; | ||
| 633 | idev->reg_base = QUARK_X1000_IMR_REGBASE; | ||
| 634 | idev->init = true; | ||
| 635 | |||
| 636 | mutex_init(&idev->lock); | ||
| 637 | ret = imr_debugfs_register(idev); | ||
| 638 | if (ret != 0) | ||
| 639 | pr_warn("debugfs register failed!\n"); | ||
| 640 | imr_fixup_memmap(idev); | ||
| 641 | return 0; | ||
| 642 | } | ||
| 643 | |||
| 644 | /** | ||
| 645 | * imr_exit - exit point for IMR code. | ||
| 646 | * | ||
| 647 | * Deregisters debugfs, leave IMR state as-is. | ||
| 648 | * | ||
| 649 | * return: | ||
| 650 | */ | ||
| 651 | static void __exit imr_exit(void) | ||
| 652 | { | ||
| 653 | imr_debugfs_unregister(&imr_dev); | ||
| 654 | } | ||
| 655 | |||
| 656 | module_init(imr_init); | ||
| 657 | module_exit(imr_exit); | ||
| 658 | |||
| 659 | MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); | ||
| 660 | MODULE_DESCRIPTION("Intel Isolated Memory Region driver"); | ||
| 661 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c new file mode 100644 index 000000000000..c9a0838890e2 --- /dev/null +++ b/arch/x86/platform/intel-quark/imr_selftest.c | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | /** | ||
| 2 | * imr_selftest.c | ||
| 3 | * | ||
| 4 | * Copyright(c) 2013 Intel Corporation. | ||
| 5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> | ||
| 6 | * | ||
| 7 | * IMR self test. The purpose of this module is to run a set of tests on the | ||
| 8 | * IMR API to validate it's sanity. We check for overlapping, reserved | ||
| 9 | * addresses and setup/teardown sanity. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <asm-generic/sections.h> | ||
| 14 | #include <asm/imr.h> | ||
| 15 | #include <linux/init.h> | ||
| 16 | #include <linux/mm.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/types.h> | ||
| 19 | |||
| 20 | #define SELFTEST KBUILD_MODNAME ": " | ||
| 21 | /** | ||
| 22 | * imr_self_test_result - Print result string for self test. | ||
| 23 | * | ||
| 24 | * @res: result code - true if test passed false otherwise. | ||
| 25 | * @fmt: format string. | ||
| 26 | * ... variadic argument list. | ||
| 27 | */ | ||
| 28 | static void __init imr_self_test_result(int res, const char *fmt, ...) | ||
| 29 | { | ||
| 30 | va_list vlist; | ||
| 31 | |||
| 32 | /* Print pass/fail. */ | ||
| 33 | if (res) | ||
| 34 | pr_info(SELFTEST "pass "); | ||
| 35 | else | ||
| 36 | pr_info(SELFTEST "fail "); | ||
| 37 | |||
| 38 | /* Print variable string. */ | ||
| 39 | va_start(vlist, fmt); | ||
| 40 | vprintk(fmt, vlist); | ||
| 41 | va_end(vlist); | ||
| 42 | |||
| 43 | /* Optional warning. */ | ||
| 44 | WARN(res == 0, "test failed"); | ||
| 45 | } | ||
| 46 | #undef SELFTEST | ||
| 47 | |||
| 48 | /** | ||
| 49 | * imr_self_test | ||
| 50 | * | ||
| 51 | * Verify IMR self_test with some simple tests to verify overlap, | ||
| 52 | * zero sized allocations and 1 KiB sized areas. | ||
| 53 | * | ||
| 54 | */ | ||
| 55 | static void __init imr_self_test(void) | ||
| 56 | { | ||
| 57 | phys_addr_t base = virt_to_phys(&_text); | ||
| 58 | size_t size = virt_to_phys(&__end_rodata) - base; | ||
| 59 | const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n"; | ||
| 60 | int ret; | ||
| 61 | |||
| 62 | /* Test zero zero. */ | ||
| 63 | ret = imr_add_range(0, 0, 0, 0, false); | ||
| 64 | imr_self_test_result(ret < 0, "zero sized IMR\n"); | ||
| 65 | |||
| 66 | /* Test exact overlap. */ | ||
| 67 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | ||
| 68 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | ||
| 69 | |||
| 70 | /* Test overlap with base inside of existing. */ | ||
| 71 | base += size - IMR_ALIGN; | ||
| 72 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | ||
| 73 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | ||
| 74 | |||
| 75 | /* Test overlap with end inside of existing. */ | ||
| 76 | base -= size + IMR_ALIGN * 2; | ||
| 77 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | ||
| 78 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | ||
| 79 | |||
| 80 | /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ | ||
| 81 | ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, | ||
| 82 | IMR_WRITE_ACCESS_ALL, false); | ||
| 83 | imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); | ||
| 84 | |||
| 85 | /* Test that a 1 KiB IMR @ zero with CPU only will work. */ | ||
| 86 | ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false); | ||
| 87 | imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); | ||
| 88 | if (ret >= 0) { | ||
| 89 | ret = imr_remove_range(0, IMR_ALIGN); | ||
| 90 | imr_self_test_result(ret == 0, "teardown - cpu-access\n"); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Test 2 KiB works. */ | ||
| 94 | size = IMR_ALIGN * 2; | ||
| 95 | ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, | ||
| 96 | IMR_WRITE_ACCESS_ALL, false); | ||
| 97 | imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); | ||
| 98 | if (ret >= 0) { | ||
| 99 | ret = imr_remove_range(0, size); | ||
| 100 | imr_self_test_result(ret == 0, "teardown 2KiB\n"); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | /** | ||
| 105 | * imr_self_test_init - entry point for IMR driver. | ||
| 106 | * | ||
| 107 | * return: -ENODEV for no IMR support 0 if good to go. | ||
| 108 | */ | ||
| 109 | static int __init imr_self_test_init(void) | ||
| 110 | { | ||
| 111 | imr_self_test(); | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | /** | ||
| 116 | * imr_self_test_exit - exit point for IMR code. | ||
| 117 | * | ||
| 118 | * return: | ||
| 119 | */ | ||
| 120 | static void __exit imr_self_test_exit(void) | ||
| 121 | { | ||
| 122 | } | ||
| 123 | |||
| 124 | module_init(imr_self_test_init); | ||
| 125 | module_exit(imr_self_test_exit); | ||
| 126 | |||
| 127 | MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); | ||
| 128 | MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver"); | ||
| 129 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 23b45eb9a89c..956374c1edbc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
| @@ -41,7 +41,7 @@ static u8 zero_stats; | |||
| 41 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
| 42 | { | 42 | { |
| 43 | u8 ret; | 43 | u8 ret; |
| 44 | u8 old = ACCESS_ONCE(zero_stats); | 44 | u8 old = READ_ONCE(zero_stats); |
| 45 | if (unlikely(old)) { | 45 | if (unlikely(old)) { |
| 46 | ret = cmpxchg(&zero_stats, old, 0); | 46 | ret = cmpxchg(&zero_stats, old, 0); |
| 47 | /* This ensures only one fellow resets the stat */ | 47 | /* This ensures only one fellow resets the stat */ |
| @@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
| 112 | struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); | 112 | struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); |
| 113 | int cpu = smp_processor_id(); | 113 | int cpu = smp_processor_id(); |
| 114 | u64 start; | 114 | u64 start; |
| 115 | __ticket_t head; | ||
| 115 | unsigned long flags; | 116 | unsigned long flags; |
| 116 | 117 | ||
| 117 | /* If kicker interrupts not initialized yet, just spin */ | 118 | /* If kicker interrupts not initialized yet, just spin */ |
| @@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
| 159 | */ | 160 | */ |
| 160 | __ticket_enter_slowpath(lock); | 161 | __ticket_enter_slowpath(lock); |
| 161 | 162 | ||
| 163 | /* make sure enter_slowpath, which is atomic does not cross the read */ | ||
| 164 | smp_mb__after_atomic(); | ||
| 165 | |||
| 162 | /* | 166 | /* |
| 163 | * check again make sure it didn't become free while | 167 | * check again make sure it didn't become free while |
| 164 | * we weren't looking | 168 | * we weren't looking |
| 165 | */ | 169 | */ |
| 166 | if (ACCESS_ONCE(lock->tickets.head) == want) { | 170 | head = READ_ONCE(lock->tickets.head); |
| 171 | if (__tickets_equal(head, want)) { | ||
| 167 | add_stats(TAKEN_SLOW_PICKUP, 1); | 172 | add_stats(TAKEN_SLOW_PICKUP, 1); |
| 168 | goto out; | 173 | goto out; |
| 169 | } | 174 | } |
| @@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) | |||
| 204 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | 209 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); |
| 205 | 210 | ||
| 206 | /* Make sure we read lock before want */ | 211 | /* Make sure we read lock before want */ |
| 207 | if (ACCESS_ONCE(w->lock) == lock && | 212 | if (READ_ONCE(w->lock) == lock && |
| 208 | ACCESS_ONCE(w->want) == next) { | 213 | READ_ONCE(w->want) == next) { |
| 209 | add_stats(RELEASED_SLOW_KICKED, 1); | 214 | add_stats(RELEASED_SLOW_KICKED, 1); |
| 210 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 215 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
| 211 | break; | 216 | break; |
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index ec318bf434a6..1786574536b2 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c | |||
| @@ -157,12 +157,16 @@ static int ipmi_release(struct inode *inode, struct file *file) | |||
| 157 | { | 157 | { |
| 158 | struct ipmi_file_private *priv = file->private_data; | 158 | struct ipmi_file_private *priv = file->private_data; |
| 159 | int rv; | 159 | int rv; |
| 160 | struct ipmi_recv_msg *msg, *next; | ||
| 160 | 161 | ||
| 161 | rv = ipmi_destroy_user(priv->user); | 162 | rv = ipmi_destroy_user(priv->user); |
| 162 | if (rv) | 163 | if (rv) |
| 163 | return rv; | 164 | return rv; |
| 164 | 165 | ||
| 165 | /* FIXME - free the messages in the list. */ | 166 | list_for_each_entry_safe(msg, next, &priv->recv_msgs, link) |
| 167 | ipmi_free_recv_msg(msg); | ||
| 168 | |||
| 169 | |||
| 166 | kfree(priv); | 170 | kfree(priv); |
| 167 | 171 | ||
| 168 | return 0; | 172 | return 0; |
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 6b65fa4e0c55..9bb592872532 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c | |||
| @@ -1483,14 +1483,10 @@ static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg, | |||
| 1483 | smi_msg->msgid = msgid; | 1483 | smi_msg->msgid = msgid; |
| 1484 | } | 1484 | } |
| 1485 | 1485 | ||
| 1486 | static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers, | 1486 | static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf, |
| 1487 | struct ipmi_smi_msg *smi_msg, int priority) | 1487 | struct ipmi_smi_msg *smi_msg, |
| 1488 | int priority) | ||
| 1488 | { | 1489 | { |
| 1489 | int run_to_completion = intf->run_to_completion; | ||
| 1490 | unsigned long flags; | ||
| 1491 | |||
| 1492 | if (!run_to_completion) | ||
| 1493 | spin_lock_irqsave(&intf->xmit_msgs_lock, flags); | ||
| 1494 | if (intf->curr_msg) { | 1490 | if (intf->curr_msg) { |
| 1495 | if (priority > 0) | 1491 | if (priority > 0) |
| 1496 | list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs); | 1492 | list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs); |
| @@ -1500,8 +1496,25 @@ static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers, | |||
| 1500 | } else { | 1496 | } else { |
| 1501 | intf->curr_msg = smi_msg; | 1497 | intf->curr_msg = smi_msg; |
| 1502 | } | 1498 | } |
| 1503 | if (!run_to_completion) | 1499 | |
| 1500 | return smi_msg; | ||
| 1501 | } | ||
| 1502 | |||
| 1503 | |||
| 1504 | static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers, | ||
| 1505 | struct ipmi_smi_msg *smi_msg, int priority) | ||
| 1506 | { | ||
| 1507 | int run_to_completion = intf->run_to_completion; | ||
| 1508 | |||
| 1509 | if (run_to_completion) { | ||
| 1510 | smi_msg = smi_add_send_msg(intf, smi_msg, priority); | ||
| 1511 | } else { | ||
| 1512 | unsigned long flags; | ||
| 1513 | |||
| 1514 | spin_lock_irqsave(&intf->xmit_msgs_lock, flags); | ||
| 1515 | smi_msg = smi_add_send_msg(intf, smi_msg, priority); | ||
| 1504 | spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); | 1516 | spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); |
| 1517 | } | ||
| 1505 | 1518 | ||
| 1506 | if (smi_msg) | 1519 | if (smi_msg) |
| 1507 | handlers->sender(intf->send_info, smi_msg); | 1520 | handlers->sender(intf->send_info, smi_msg); |
| @@ -1985,7 +1998,9 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v) | |||
| 1985 | seq_printf(m, "%x", intf->channels[0].address); | 1998 | seq_printf(m, "%x", intf->channels[0].address); |
| 1986 | for (i = 1; i < IPMI_MAX_CHANNELS; i++) | 1999 | for (i = 1; i < IPMI_MAX_CHANNELS; i++) |
| 1987 | seq_printf(m, " %x", intf->channels[i].address); | 2000 | seq_printf(m, " %x", intf->channels[i].address); |
| 1988 | return seq_putc(m, '\n'); | 2001 | seq_putc(m, '\n'); |
| 2002 | |||
| 2003 | return seq_has_overflowed(m); | ||
| 1989 | } | 2004 | } |
| 1990 | 2005 | ||
| 1991 | static int smi_ipmb_proc_open(struct inode *inode, struct file *file) | 2006 | static int smi_ipmb_proc_open(struct inode *inode, struct file *file) |
| @@ -2004,9 +2019,11 @@ static int smi_version_proc_show(struct seq_file *m, void *v) | |||
| 2004 | { | 2019 | { |
| 2005 | ipmi_smi_t intf = m->private; | 2020 | ipmi_smi_t intf = m->private; |
| 2006 | 2021 | ||
| 2007 | return seq_printf(m, "%u.%u\n", | 2022 | seq_printf(m, "%u.%u\n", |
| 2008 | ipmi_version_major(&intf->bmc->id), | 2023 | ipmi_version_major(&intf->bmc->id), |
| 2009 | ipmi_version_minor(&intf->bmc->id)); | 2024 | ipmi_version_minor(&intf->bmc->id)); |
| 2025 | |||
| 2026 | return seq_has_overflowed(m); | ||
| 2010 | } | 2027 | } |
| 2011 | 2028 | ||
| 2012 | static int smi_version_proc_open(struct inode *inode, struct file *file) | 2029 | static int smi_version_proc_open(struct inode *inode, struct file *file) |
| @@ -2353,11 +2370,28 @@ static struct attribute *bmc_dev_attrs[] = { | |||
| 2353 | &dev_attr_additional_device_support.attr, | 2370 | &dev_attr_additional_device_support.attr, |
| 2354 | &dev_attr_manufacturer_id.attr, | 2371 | &dev_attr_manufacturer_id.attr, |
| 2355 | &dev_attr_product_id.attr, | 2372 | &dev_attr_product_id.attr, |
| 2373 | &dev_attr_aux_firmware_revision.attr, | ||
| 2374 | &dev_attr_guid.attr, | ||
| 2356 | NULL | 2375 | NULL |
| 2357 | }; | 2376 | }; |
| 2358 | 2377 | ||
| 2378 | static umode_t bmc_dev_attr_is_visible(struct kobject *kobj, | ||
| 2379 | struct attribute *attr, int idx) | ||
| 2380 | { | ||
| 2381 | struct device *dev = kobj_to_dev(kobj); | ||
| 2382 | struct bmc_device *bmc = to_bmc_device(dev); | ||
| 2383 | umode_t mode = attr->mode; | ||
| 2384 | |||
| 2385 | if (attr == &dev_attr_aux_firmware_revision.attr) | ||
| 2386 | return bmc->id.aux_firmware_revision_set ? mode : 0; | ||
| 2387 | if (attr == &dev_attr_guid.attr) | ||
| 2388 | return bmc->guid_set ? mode : 0; | ||
| 2389 | return mode; | ||
| 2390 | } | ||
| 2391 | |||
| 2359 | static struct attribute_group bmc_dev_attr_group = { | 2392 | static struct attribute_group bmc_dev_attr_group = { |
| 2360 | .attrs = bmc_dev_attrs, | 2393 | .attrs = bmc_dev_attrs, |
| 2394 | .is_visible = bmc_dev_attr_is_visible, | ||
| 2361 | }; | 2395 | }; |
| 2362 | 2396 | ||
| 2363 | static const struct attribute_group *bmc_dev_attr_groups[] = { | 2397 | static const struct attribute_group *bmc_dev_attr_groups[] = { |
| @@ -2380,13 +2414,6 @@ cleanup_bmc_device(struct kref *ref) | |||
| 2380 | { | 2414 | { |
| 2381 | struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount); | 2415 | struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount); |
| 2382 | 2416 | ||
| 2383 | if (bmc->id.aux_firmware_revision_set) | ||
| 2384 | device_remove_file(&bmc->pdev.dev, | ||
| 2385 | &dev_attr_aux_firmware_revision); | ||
| 2386 | if (bmc->guid_set) | ||
| 2387 | device_remove_file(&bmc->pdev.dev, | ||
| 2388 | &dev_attr_guid); | ||
| 2389 | |||
| 2390 | platform_device_unregister(&bmc->pdev); | 2417 | platform_device_unregister(&bmc->pdev); |
| 2391 | } | 2418 | } |
| 2392 | 2419 | ||
| @@ -2407,33 +2434,6 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) | |||
| 2407 | mutex_unlock(&ipmidriver_mutex); | 2434 | mutex_unlock(&ipmidriver_mutex); |
| 2408 | } | 2435 | } |
| 2409 | 2436 | ||
| 2410 | static int create_bmc_files(struct bmc_device *bmc) | ||
| 2411 | { | ||
| 2412 | int err; | ||
| 2413 | |||
| 2414 | if (bmc->id.aux_firmware_revision_set) { | ||
| 2415 | err = device_create_file(&bmc->pdev.dev, | ||
| 2416 | &dev_attr_aux_firmware_revision); | ||
| 2417 | if (err) | ||
| 2418 | goto out; | ||
| 2419 | } | ||
| 2420 | if (bmc->guid_set) { | ||
| 2421 | err = device_create_file(&bmc->pdev.dev, | ||
| 2422 | &dev_attr_guid); | ||
| 2423 | if (err) | ||
| 2424 | goto out_aux_firm; | ||
| 2425 | } | ||
| 2426 | |||
| 2427 | return 0; | ||
| 2428 | |||
| 2429 | out_aux_firm: | ||
| 2430 | if (bmc->id.aux_firmware_revision_set) | ||
| 2431 | device_remove_file(&bmc->pdev.dev, | ||
| 2432 | &dev_attr_aux_firmware_revision); | ||
| 2433 | out: | ||
| 2434 | return err; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum) | 2437 | static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum) |
| 2438 | { | 2438 | { |
| 2439 | int rv; | 2439 | int rv; |
| @@ -2522,15 +2522,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum) | |||
| 2522 | return rv; | 2522 | return rv; |
| 2523 | } | 2523 | } |
| 2524 | 2524 | ||
| 2525 | rv = create_bmc_files(bmc); | ||
| 2526 | if (rv) { | ||
| 2527 | mutex_lock(&ipmidriver_mutex); | ||
| 2528 | platform_device_unregister(&bmc->pdev); | ||
| 2529 | mutex_unlock(&ipmidriver_mutex); | ||
| 2530 | |||
| 2531 | return rv; | ||
| 2532 | } | ||
| 2533 | |||
| 2534 | dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, " | 2525 | dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, " |
| 2535 | "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n", | 2526 | "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n", |
| 2536 | bmc->id.manufacturer_id, | 2527 | bmc->id.manufacturer_id, |
| @@ -4212,7 +4203,6 @@ static void need_waiter(ipmi_smi_t intf) | |||
| 4212 | static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0); | 4203 | static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0); |
| 4213 | static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); | 4204 | static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); |
| 4214 | 4205 | ||
| 4215 | /* FIXME - convert these to slabs. */ | ||
| 4216 | static void free_smi_msg(struct ipmi_smi_msg *msg) | 4206 | static void free_smi_msg(struct ipmi_smi_msg *msg) |
| 4217 | { | 4207 | { |
| 4218 | atomic_dec(&smi_msg_inuse_count); | 4208 | atomic_dec(&smi_msg_inuse_count); |
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 967b73aa4e66..f6646ed3047e 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c | |||
| @@ -321,6 +321,18 @@ static int try_smi_init(struct smi_info *smi); | |||
| 321 | static void cleanup_one_si(struct smi_info *to_clean); | 321 | static void cleanup_one_si(struct smi_info *to_clean); |
| 322 | static void cleanup_ipmi_si(void); | 322 | static void cleanup_ipmi_si(void); |
| 323 | 323 | ||
| 324 | #ifdef DEBUG_TIMING | ||
| 325 | void debug_timestamp(char *msg) | ||
| 326 | { | ||
| 327 | struct timespec64 t; | ||
| 328 | |||
| 329 | getnstimeofday64(&t); | ||
| 330 | pr_debug("**%s: %lld.%9.9ld\n", msg, (long long) t.tv_sec, t.tv_nsec); | ||
| 331 | } | ||
| 332 | #else | ||
| 333 | #define debug_timestamp(x) | ||
| 334 | #endif | ||
| 335 | |||
| 324 | static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list); | 336 | static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list); |
| 325 | static int register_xaction_notifier(struct notifier_block *nb) | 337 | static int register_xaction_notifier(struct notifier_block *nb) |
| 326 | { | 338 | { |
| @@ -358,9 +370,6 @@ static void return_hosed_msg(struct smi_info *smi_info, int cCode) | |||
| 358 | static enum si_sm_result start_next_msg(struct smi_info *smi_info) | 370 | static enum si_sm_result start_next_msg(struct smi_info *smi_info) |
| 359 | { | 371 | { |
| 360 | int rv; | 372 | int rv; |
| 361 | #ifdef DEBUG_TIMING | ||
| 362 | struct timeval t; | ||
| 363 | #endif | ||
| 364 | 373 | ||
| 365 | if (!smi_info->waiting_msg) { | 374 | if (!smi_info->waiting_msg) { |
| 366 | smi_info->curr_msg = NULL; | 375 | smi_info->curr_msg = NULL; |
| @@ -370,10 +379,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) | |||
| 370 | 379 | ||
| 371 | smi_info->curr_msg = smi_info->waiting_msg; | 380 | smi_info->curr_msg = smi_info->waiting_msg; |
| 372 | smi_info->waiting_msg = NULL; | 381 | smi_info->waiting_msg = NULL; |
| 373 | #ifdef DEBUG_TIMING | 382 | debug_timestamp("Start2"); |
| 374 | do_gettimeofday(&t); | ||
| 375 | printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 376 | #endif | ||
| 377 | err = atomic_notifier_call_chain(&xaction_notifier_list, | 383 | err = atomic_notifier_call_chain(&xaction_notifier_list, |
| 378 | 0, smi_info); | 384 | 0, smi_info); |
| 379 | if (err & NOTIFY_STOP_MASK) { | 385 | if (err & NOTIFY_STOP_MASK) { |
| @@ -582,12 +588,8 @@ static void check_bt_irq(struct smi_info *smi_info, bool irq_on) | |||
| 582 | static void handle_transaction_done(struct smi_info *smi_info) | 588 | static void handle_transaction_done(struct smi_info *smi_info) |
| 583 | { | 589 | { |
| 584 | struct ipmi_smi_msg *msg; | 590 | struct ipmi_smi_msg *msg; |
| 585 | #ifdef DEBUG_TIMING | ||
| 586 | struct timeval t; | ||
| 587 | 591 | ||
| 588 | do_gettimeofday(&t); | 592 | debug_timestamp("Done"); |
| 589 | printk(KERN_DEBUG "**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 590 | #endif | ||
| 591 | switch (smi_info->si_state) { | 593 | switch (smi_info->si_state) { |
| 592 | case SI_NORMAL: | 594 | case SI_NORMAL: |
| 593 | if (!smi_info->curr_msg) | 595 | if (!smi_info->curr_msg) |
| @@ -929,24 +931,15 @@ static void sender(void *send_info, | |||
| 929 | struct smi_info *smi_info = send_info; | 931 | struct smi_info *smi_info = send_info; |
| 930 | enum si_sm_result result; | 932 | enum si_sm_result result; |
| 931 | unsigned long flags; | 933 | unsigned long flags; |
| 932 | #ifdef DEBUG_TIMING | ||
| 933 | struct timeval t; | ||
| 934 | #endif | ||
| 935 | |||
| 936 | BUG_ON(smi_info->waiting_msg); | ||
| 937 | smi_info->waiting_msg = msg; | ||
| 938 | 934 | ||
| 939 | #ifdef DEBUG_TIMING | 935 | debug_timestamp("Enqueue"); |
| 940 | do_gettimeofday(&t); | ||
| 941 | printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 942 | #endif | ||
| 943 | 936 | ||
| 944 | if (smi_info->run_to_completion) { | 937 | if (smi_info->run_to_completion) { |
| 945 | /* | 938 | /* |
| 946 | * If we are running to completion, start it and run | 939 | * If we are running to completion, start it and run |
| 947 | * transactions until everything is clear. | 940 | * transactions until everything is clear. |
| 948 | */ | 941 | */ |
| 949 | smi_info->curr_msg = smi_info->waiting_msg; | 942 | smi_info->curr_msg = msg; |
| 950 | smi_info->waiting_msg = NULL; | 943 | smi_info->waiting_msg = NULL; |
| 951 | 944 | ||
| 952 | /* | 945 | /* |
| @@ -964,6 +957,15 @@ static void sender(void *send_info, | |||
| 964 | } | 957 | } |
| 965 | 958 | ||
| 966 | spin_lock_irqsave(&smi_info->si_lock, flags); | 959 | spin_lock_irqsave(&smi_info->si_lock, flags); |
| 960 | /* | ||
| 961 | * The following two lines don't need to be under the lock for | ||
| 962 | * the lock's sake, but they do need SMP memory barriers to | ||
| 963 | * avoid getting things out of order. We are already claiming | ||
| 964 | * the lock, anyway, so just do it under the lock to avoid the | ||
| 965 | * ordering problem. | ||
| 966 | */ | ||
| 967 | BUG_ON(smi_info->waiting_msg); | ||
| 968 | smi_info->waiting_msg = msg; | ||
| 967 | check_start_timer_thread(smi_info); | 969 | check_start_timer_thread(smi_info); |
| 968 | spin_unlock_irqrestore(&smi_info->si_lock, flags); | 970 | spin_unlock_irqrestore(&smi_info->si_lock, flags); |
| 969 | } | 971 | } |
| @@ -989,18 +991,18 @@ static void set_run_to_completion(void *send_info, bool i_run_to_completion) | |||
| 989 | * we are spinning in kipmid looking for something and not delaying | 991 | * we are spinning in kipmid looking for something and not delaying |
| 990 | * between checks | 992 | * between checks |
| 991 | */ | 993 | */ |
| 992 | static inline void ipmi_si_set_not_busy(struct timespec *ts) | 994 | static inline void ipmi_si_set_not_busy(struct timespec64 *ts) |
| 993 | { | 995 | { |
| 994 | ts->tv_nsec = -1; | 996 | ts->tv_nsec = -1; |
| 995 | } | 997 | } |
| 996 | static inline int ipmi_si_is_busy(struct timespec *ts) | 998 | static inline int ipmi_si_is_busy(struct timespec64 *ts) |
| 997 | { | 999 | { |
| 998 | return ts->tv_nsec != -1; | 1000 | return ts->tv_nsec != -1; |
| 999 | } | 1001 | } |
| 1000 | 1002 | ||
| 1001 | static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result, | 1003 | static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result, |
| 1002 | const struct smi_info *smi_info, | 1004 | const struct smi_info *smi_info, |
| 1003 | struct timespec *busy_until) | 1005 | struct timespec64 *busy_until) |
| 1004 | { | 1006 | { |
| 1005 | unsigned int max_busy_us = 0; | 1007 | unsigned int max_busy_us = 0; |
| 1006 | 1008 | ||
| @@ -1009,12 +1011,13 @@ static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result, | |||
| 1009 | if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY) | 1011 | if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY) |
| 1010 | ipmi_si_set_not_busy(busy_until); | 1012 | ipmi_si_set_not_busy(busy_until); |
| 1011 | else if (!ipmi_si_is_busy(busy_until)) { | 1013 | else if (!ipmi_si_is_busy(busy_until)) { |
| 1012 | getnstimeofday(busy_until); | 1014 | getnstimeofday64(busy_until); |
| 1013 | timespec_add_ns(busy_until, max_busy_us*NSEC_PER_USEC); | 1015 | timespec64_add_ns(busy_until, max_busy_us*NSEC_PER_USEC); |
| 1014 | } else { | 1016 | } else { |
| 1015 | struct timespec now; | 1017 | struct timespec64 now; |
| 1016 | getnstimeofday(&now); | 1018 | |
| 1017 | if (unlikely(timespec_compare(&now, busy_until) > 0)) { | 1019 | getnstimeofday64(&now); |
| 1020 | if (unlikely(timespec64_compare(&now, busy_until) > 0)) { | ||
| 1018 | ipmi_si_set_not_busy(busy_until); | 1021 | ipmi_si_set_not_busy(busy_until); |
| 1019 | return 0; | 1022 | return 0; |
| 1020 | } | 1023 | } |
| @@ -1037,7 +1040,7 @@ static int ipmi_thread(void *data) | |||
| 1037 | struct smi_info *smi_info = data; | 1040 | struct smi_info *smi_info = data; |
| 1038 | unsigned long flags; | 1041 | unsigned long flags; |
| 1039 | enum si_sm_result smi_result; | 1042 | enum si_sm_result smi_result; |
| 1040 | struct timespec busy_until; | 1043 | struct timespec64 busy_until; |
| 1041 | 1044 | ||
| 1042 | ipmi_si_set_not_busy(&busy_until); | 1045 | ipmi_si_set_not_busy(&busy_until); |
| 1043 | set_user_nice(current, MAX_NICE); | 1046 | set_user_nice(current, MAX_NICE); |
| @@ -1128,15 +1131,10 @@ static void smi_timeout(unsigned long data) | |||
| 1128 | unsigned long jiffies_now; | 1131 | unsigned long jiffies_now; |
| 1129 | long time_diff; | 1132 | long time_diff; |
| 1130 | long timeout; | 1133 | long timeout; |
| 1131 | #ifdef DEBUG_TIMING | ||
| 1132 | struct timeval t; | ||
| 1133 | #endif | ||
| 1134 | 1134 | ||
| 1135 | spin_lock_irqsave(&(smi_info->si_lock), flags); | 1135 | spin_lock_irqsave(&(smi_info->si_lock), flags); |
| 1136 | #ifdef DEBUG_TIMING | 1136 | debug_timestamp("Timer"); |
| 1137 | do_gettimeofday(&t); | 1137 | |
| 1138 | printk(KERN_DEBUG "**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 1139 | #endif | ||
| 1140 | jiffies_now = jiffies; | 1138 | jiffies_now = jiffies; |
| 1141 | time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) | 1139 | time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) |
| 1142 | * SI_USEC_PER_JIFFY); | 1140 | * SI_USEC_PER_JIFFY); |
| @@ -1173,18 +1171,13 @@ static irqreturn_t si_irq_handler(int irq, void *data) | |||
| 1173 | { | 1171 | { |
| 1174 | struct smi_info *smi_info = data; | 1172 | struct smi_info *smi_info = data; |
| 1175 | unsigned long flags; | 1173 | unsigned long flags; |
| 1176 | #ifdef DEBUG_TIMING | ||
| 1177 | struct timeval t; | ||
| 1178 | #endif | ||
| 1179 | 1174 | ||
| 1180 | spin_lock_irqsave(&(smi_info->si_lock), flags); | 1175 | spin_lock_irqsave(&(smi_info->si_lock), flags); |
| 1181 | 1176 | ||
| 1182 | smi_inc_stat(smi_info, interrupts); | 1177 | smi_inc_stat(smi_info, interrupts); |
| 1183 | 1178 | ||
| 1184 | #ifdef DEBUG_TIMING | 1179 | debug_timestamp("Interrupt"); |
| 1185 | do_gettimeofday(&t); | 1180 | |
| 1186 | printk(KERN_DEBUG "**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 1187 | #endif | ||
| 1188 | smi_event_handler(smi_info, 0); | 1181 | smi_event_handler(smi_info, 0); |
| 1189 | spin_unlock_irqrestore(&(smi_info->si_lock), flags); | 1182 | spin_unlock_irqrestore(&(smi_info->si_lock), flags); |
| 1190 | return IRQ_HANDLED; | 1183 | return IRQ_HANDLED; |
| @@ -2038,18 +2031,13 @@ static u32 ipmi_acpi_gpe(acpi_handle gpe_device, | |||
| 2038 | { | 2031 | { |
| 2039 | struct smi_info *smi_info = context; | 2032 | struct smi_info *smi_info = context; |
| 2040 | unsigned long flags; | 2033 | unsigned long flags; |
| 2041 | #ifdef DEBUG_TIMING | ||
| 2042 | struct timeval t; | ||
| 2043 | #endif | ||
| 2044 | 2034 | ||
| 2045 | spin_lock_irqsave(&(smi_info->si_lock), flags); | 2035 | spin_lock_irqsave(&(smi_info->si_lock), flags); |
| 2046 | 2036 | ||
| 2047 | smi_inc_stat(smi_info, interrupts); | 2037 | smi_inc_stat(smi_info, interrupts); |
| 2048 | 2038 | ||
| 2049 | #ifdef DEBUG_TIMING | 2039 | debug_timestamp("ACPI_GPE"); |
| 2050 | do_gettimeofday(&t); | 2040 | |
| 2051 | printk("**ACPI_GPE: %d.%9.9d\n", t.tv_sec, t.tv_usec); | ||
| 2052 | #endif | ||
| 2053 | smi_event_handler(smi_info, 0); | 2041 | smi_event_handler(smi_info, 0); |
| 2054 | spin_unlock_irqrestore(&(smi_info->si_lock), flags); | 2042 | spin_unlock_irqrestore(&(smi_info->si_lock), flags); |
| 2055 | 2043 | ||
| @@ -2071,7 +2059,6 @@ static int acpi_gpe_irq_setup(struct smi_info *info) | |||
| 2071 | if (!info->irq) | 2059 | if (!info->irq) |
| 2072 | return 0; | 2060 | return 0; |
| 2073 | 2061 | ||
| 2074 | /* FIXME - is level triggered right? */ | ||
| 2075 | status = acpi_install_gpe_handler(NULL, | 2062 | status = acpi_install_gpe_handler(NULL, |
| 2076 | info->irq, | 2063 | info->irq, |
| 2077 | ACPI_GPE_LEVEL_TRIGGERED, | 2064 | ACPI_GPE_LEVEL_TRIGGERED, |
| @@ -2998,7 +2985,9 @@ static int smi_type_proc_show(struct seq_file *m, void *v) | |||
| 2998 | { | 2985 | { |
| 2999 | struct smi_info *smi = m->private; | 2986 | struct smi_info *smi = m->private; |
| 3000 | 2987 | ||
| 3001 | return seq_printf(m, "%s\n", si_to_str[smi->si_type]); | 2988 | seq_printf(m, "%s\n", si_to_str[smi->si_type]); |
| 2989 | |||
| 2990 | return seq_has_overflowed(m); | ||
| 3002 | } | 2991 | } |
| 3003 | 2992 | ||
| 3004 | static int smi_type_proc_open(struct inode *inode, struct file *file) | 2993 | static int smi_type_proc_open(struct inode *inode, struct file *file) |
| @@ -3060,16 +3049,18 @@ static int smi_params_proc_show(struct seq_file *m, void *v) | |||
| 3060 | { | 3049 | { |
| 3061 | struct smi_info *smi = m->private; | 3050 | struct smi_info *smi = m->private; |
| 3062 | 3051 | ||
| 3063 | return seq_printf(m, | 3052 | seq_printf(m, |
| 3064 | "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n", | 3053 | "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n", |
| 3065 | si_to_str[smi->si_type], | 3054 | si_to_str[smi->si_type], |
| 3066 | addr_space_to_str[smi->io.addr_type], | 3055 | addr_space_to_str[smi->io.addr_type], |
| 3067 | smi->io.addr_data, | 3056 | smi->io.addr_data, |
| 3068 | smi->io.regspacing, | 3057 | smi->io.regspacing, |
| 3069 | smi->io.regsize, | 3058 | smi->io.regsize, |
| 3070 | smi->io.regshift, | 3059 | smi->io.regshift, |
| 3071 | smi->irq, | 3060 | smi->irq, |
| 3072 | smi->slave_addr); | 3061 | smi->slave_addr); |
| 3062 | |||
| 3063 | return seq_has_overflowed(m); | ||
| 3073 | } | 3064 | } |
| 3074 | 3065 | ||
| 3075 | static int smi_params_proc_open(struct inode *inode, struct file *file) | 3066 | static int smi_params_proc_open(struct inode *inode, struct file *file) |
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 982b96323f82..f6e378dac5f5 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c | |||
| @@ -1097,8 +1097,6 @@ static int ssif_remove(struct i2c_client *client) | |||
| 1097 | if (!ssif_info) | 1097 | if (!ssif_info) |
| 1098 | return 0; | 1098 | return 0; |
| 1099 | 1099 | ||
| 1100 | i2c_set_clientdata(client, NULL); | ||
| 1101 | |||
| 1102 | /* | 1100 | /* |
| 1103 | * After this point, we won't deliver anything asychronously | 1101 | * After this point, we won't deliver anything asychronously |
| 1104 | * to the message handler. We can unregister ourself. | 1102 | * to the message handler. We can unregister ourself. |
| @@ -1198,7 +1196,9 @@ static int ssif_detect(struct i2c_client *client, struct i2c_board_info *info) | |||
| 1198 | 1196 | ||
| 1199 | static int smi_type_proc_show(struct seq_file *m, void *v) | 1197 | static int smi_type_proc_show(struct seq_file *m, void *v) |
| 1200 | { | 1198 | { |
| 1201 | return seq_puts(m, "ssif\n"); | 1199 | seq_puts(m, "ssif\n"); |
| 1200 | |||
| 1201 | return seq_has_overflowed(m); | ||
| 1202 | } | 1202 | } |
| 1203 | 1203 | ||
| 1204 | static int smi_type_proc_open(struct inode *inode, struct file *file) | 1204 | static int smi_type_proc_open(struct inode *inode, struct file *file) |
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index af5d63c7cc53..2fe195002021 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c | |||
| @@ -75,29 +75,25 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, | |||
| 75 | unsigned long key; | 75 | unsigned long key; |
| 76 | u32 desc_version; | 76 | u32 desc_version; |
| 77 | 77 | ||
| 78 | *map_size = 0; | 78 | *map_size = sizeof(*m) * 32; |
| 79 | *desc_size = 0; | 79 | again: |
| 80 | key = 0; | ||
| 81 | status = efi_call_early(get_memory_map, map_size, NULL, | ||
| 82 | &key, desc_size, &desc_version); | ||
| 83 | if (status != EFI_BUFFER_TOO_SMALL) | ||
| 84 | return EFI_LOAD_ERROR; | ||
| 85 | |||
| 86 | /* | 80 | /* |
| 87 | * Add an additional efi_memory_desc_t because we're doing an | 81 | * Add an additional efi_memory_desc_t because we're doing an |
| 88 | * allocation which may be in a new descriptor region. | 82 | * allocation which may be in a new descriptor region. |
| 89 | */ | 83 | */ |
| 90 | *map_size += *desc_size; | 84 | *map_size += sizeof(*m); |
| 91 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, | 85 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, |
| 92 | *map_size, (void **)&m); | 86 | *map_size, (void **)&m); |
| 93 | if (status != EFI_SUCCESS) | 87 | if (status != EFI_SUCCESS) |
| 94 | goto fail; | 88 | goto fail; |
| 95 | 89 | ||
| 90 | *desc_size = 0; | ||
| 91 | key = 0; | ||
| 96 | status = efi_call_early(get_memory_map, map_size, m, | 92 | status = efi_call_early(get_memory_map, map_size, m, |
| 97 | &key, desc_size, &desc_version); | 93 | &key, desc_size, &desc_version); |
| 98 | if (status == EFI_BUFFER_TOO_SMALL) { | 94 | if (status == EFI_BUFFER_TOO_SMALL) { |
| 99 | efi_call_early(free_pool, m); | 95 | efi_call_early(free_pool, m); |
| 100 | return EFI_LOAD_ERROR; | 96 | goto again; |
| 101 | } | 97 | } |
| 102 | 98 | ||
| 103 | if (status != EFI_SUCCESS) | 99 | if (status != EFI_SUCCESS) |
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 638e797037da..97527614141b 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig | |||
| @@ -735,6 +735,31 @@ config INTEL_IPS | |||
| 735 | functionality. If in doubt, say Y here; it will only load on | 735 | functionality. If in doubt, say Y here; it will only load on |
| 736 | supported platforms. | 736 | supported platforms. |
| 737 | 737 | ||
| 738 | config INTEL_IMR | ||
| 739 | bool "Intel Isolated Memory Region support" | ||
| 740 | default n | ||
| 741 | depends on X86_INTEL_QUARK && IOSF_MBI | ||
| 742 | ---help--- | ||
| 743 | This option provides a means to manipulate Isolated Memory Regions. | ||
| 744 | IMRs are a set of registers that define read and write access masks | ||
| 745 | to prohibit certain system agents from accessing memory with 1 KiB | ||
| 746 | granularity. | ||
| 747 | |||
| 748 | IMRs make it possible to control read/write access to an address | ||
| 749 | by hardware agents inside the SoC. Read and write masks can be | ||
| 750 | defined for: | ||
| 751 | - eSRAM flush | ||
| 752 | - Dirty CPU snoop (write only) | ||
| 753 | - RMU access | ||
| 754 | - PCI Virtual Channel 0/Virtual Channel 1 | ||
| 755 | - SMM mode | ||
| 756 | - Non SMM mode | ||
| 757 | |||
| 758 | Quark contains a set of eight IMR registers and makes use of those | ||
| 759 | registers during its bootup process. | ||
| 760 | |||
| 761 | If you are running on a Galileo/Quark say Y here. | ||
| 762 | |||
| 738 | config IBM_RTL | 763 | config IBM_RTL |
| 739 | tristate "Device driver to enable PRTL support" | 764 | tristate "Device driver to enable PRTL support" |
| 740 | depends on X86 && PCI | 765 | depends on X86 && PCI |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 02b16910f4c9..995986b8e36b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -645,11 +645,12 @@ out: | |||
| 645 | 645 | ||
| 646 | static unsigned long randomize_stack_top(unsigned long stack_top) | 646 | static unsigned long randomize_stack_top(unsigned long stack_top) |
| 647 | { | 647 | { |
| 648 | unsigned int random_variable = 0; | 648 | unsigned long random_variable = 0; |
| 649 | 649 | ||
| 650 | if ((current->flags & PF_RANDOMIZE) && | 650 | if ((current->flags & PF_RANDOMIZE) && |
| 651 | !(current->personality & ADDR_NO_RANDOMIZE)) { | 651 | !(current->personality & ADDR_NO_RANDOMIZE)) { |
| 652 | random_variable = get_random_int() & STACK_RND_MASK; | 652 | random_variable = (unsigned long) get_random_int(); |
| 653 | random_variable &= STACK_RND_MASK; | ||
| 653 | random_variable <<= PAGE_SHIFT; | 654 | random_variable <<= PAGE_SHIFT; |
| 654 | } | 655 | } |
| 655 | #ifdef CONFIG_STACK_GROWSUP | 656 | #ifdef CONFIG_STACK_GROWSUP |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d1ec10a940ff..1b45e4a0519b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
| @@ -202,7 +202,7 @@ static __always_inline void data_access_exceeds_word_size(void) | |||
| 202 | { | 202 | { |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | static __always_inline void __read_once_size(volatile void *p, void *res, int size) | 205 | static __always_inline void __read_once_size(const volatile void *p, void *res, int size) |
| 206 | { | 206 | { |
| 207 | switch (size) { | 207 | switch (size) { |
| 208 | case 1: *(__u8 *)res = *(volatile __u8 *)p; break; | 208 | case 1: *(__u8 *)res = *(volatile __u8 *)p; break; |
| @@ -259,10 +259,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s | |||
| 259 | */ | 259 | */ |
| 260 | 260 | ||
| 261 | #define READ_ONCE(x) \ | 261 | #define READ_ONCE(x) \ |
| 262 | ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) | 262 | ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) |
| 263 | 263 | ||
| 264 | #define WRITE_ONCE(x, val) \ | 264 | #define WRITE_ONCE(x, val) \ |
| 265 | ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; }) | 265 | ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) |
| 266 | 266 | ||
| 267 | #endif /* __KERNEL__ */ | 267 | #endif /* __KERNEL__ */ |
| 268 | 268 | ||
diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 75ae2e2631fc..a19bcf9e762e 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h | |||
| @@ -156,8 +156,14 @@ typedef enum { | |||
| 156 | KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */ | 156 | KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */ |
| 157 | } kdb_reason_t; | 157 | } kdb_reason_t; |
| 158 | 158 | ||
| 159 | enum kdb_msgsrc { | ||
| 160 | KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */ | ||
| 161 | KDB_MSGSRC_PRINTK, /* trapped from printk() */ | ||
| 162 | }; | ||
| 163 | |||
| 159 | extern int kdb_trap_printk; | 164 | extern int kdb_trap_printk; |
| 160 | extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args); | 165 | extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, |
| 166 | va_list args); | ||
| 161 | extern __printf(1, 2) int kdb_printf(const char *, ...); | 167 | extern __printf(1, 2) int kdb_printf(const char *, ...); |
| 162 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | 168 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); |
| 163 | 169 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 41c60e5302d7..6d77432e14ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *); | |||
| 363 | */ | 363 | */ |
| 364 | extern void show_stack(struct task_struct *task, unsigned long *sp); | 364 | extern void show_stack(struct task_struct *task, unsigned long *sp); |
| 365 | 365 | ||
| 366 | void io_schedule(void); | ||
| 367 | long io_schedule_timeout(long timeout); | ||
| 368 | |||
| 369 | extern void cpu_init (void); | 366 | extern void cpu_init (void); |
| 370 | extern void trap_init(void); | 367 | extern void trap_init(void); |
| 371 | extern void update_process_times(int user); | 368 | extern void update_process_times(int user); |
| @@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); | |||
| 422 | asmlinkage void schedule(void); | 419 | asmlinkage void schedule(void); |
| 423 | extern void schedule_preempt_disabled(void); | 420 | extern void schedule_preempt_disabled(void); |
| 424 | 421 | ||
| 422 | extern long io_schedule_timeout(long timeout); | ||
| 423 | |||
| 424 | static inline void io_schedule(void) | ||
| 425 | { | ||
| 426 | io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); | ||
| 427 | } | ||
| 428 | |||
| 425 | struct nsproxy; | 429 | struct nsproxy; |
| 426 | struct user_namespace; | 430 | struct user_namespace; |
| 427 | 431 | ||
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 07ce18ca71e0..0874e2edd275 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -604,7 +604,7 @@ return_normal: | |||
| 604 | online_cpus) | 604 | online_cpus) |
| 605 | cpu_relax(); | 605 | cpu_relax(); |
| 606 | if (!time_left) | 606 | if (!time_left) |
| 607 | pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); | 607 | pr_crit("Timed out waiting for secondary CPUs.\n"); |
| 608 | 608 | ||
| 609 | /* | 609 | /* |
| 610 | * At this point the primary processor is completely | 610 | * At this point the primary processor is completely |
| @@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
| 696 | 696 | ||
| 697 | if (arch_kgdb_ops.enable_nmi) | 697 | if (arch_kgdb_ops.enable_nmi) |
| 698 | arch_kgdb_ops.enable_nmi(0); | 698 | arch_kgdb_ops.enable_nmi(0); |
| 699 | /* | ||
| 700 | * Avoid entering the debugger if we were triggered due to an oops | ||
| 701 | * but panic_timeout indicates the system should automatically | ||
| 702 | * reboot on panic. We don't want to get stuck waiting for input | ||
| 703 | * on such systems, especially if its "just" an oops. | ||
| 704 | */ | ||
| 705 | if (signo != SIGTRAP && panic_timeout) | ||
| 706 | return 1; | ||
| 699 | 707 | ||
| 700 | memset(ks, 0, sizeof(struct kgdb_state)); | 708 | memset(ks, 0, sizeof(struct kgdb_state)); |
| 701 | ks->cpu = raw_smp_processor_id(); | 709 | ks->cpu = raw_smp_processor_id(); |
| @@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self, | |||
| 828 | unsigned long val, | 836 | unsigned long val, |
| 829 | void *data) | 837 | void *data) |
| 830 | { | 838 | { |
| 839 | /* | ||
| 840 | * Avoid entering the debugger if we were triggered due to a panic | ||
| 841 | * We don't want to get stuck waiting for input from user in such case. | ||
| 842 | * panic_timeout indicates the system should automatically | ||
| 843 | * reboot on panic. | ||
| 844 | */ | ||
| 845 | if (panic_timeout) | ||
| 846 | return NOTIFY_DONE; | ||
| 847 | |||
| 831 | if (dbg_kdb_mode) | 848 | if (dbg_kdb_mode) |
| 832 | kdb_printf("PANIC: %s\n", (char *)data); | 849 | kdb_printf("PANIC: %s\n", (char *)data); |
| 833 | kgdb_breakpoint(); | 850 | kgdb_breakpoint(); |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 7c70812caea5..fc1ef736253c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
| @@ -439,7 +439,7 @@ poll_again: | |||
| 439 | * substituted for %d, %x or %o in the prompt. | 439 | * substituted for %d, %x or %o in the prompt. |
| 440 | */ | 440 | */ |
| 441 | 441 | ||
| 442 | char *kdb_getstr(char *buffer, size_t bufsize, char *prompt) | 442 | char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) |
| 443 | { | 443 | { |
| 444 | if (prompt && kdb_prompt_str != prompt) | 444 | if (prompt && kdb_prompt_str != prompt) |
| 445 | strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); | 445 | strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); |
| @@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor) | |||
| 548 | return 0; | 548 | return 0; |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | int vkdb_printf(const char *fmt, va_list ap) | 551 | int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) |
| 552 | { | 552 | { |
| 553 | int diag; | 553 | int diag; |
| 554 | int linecount; | 554 | int linecount; |
| @@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
| 680 | size_avail = sizeof(kdb_buffer) - len; | 680 | size_avail = sizeof(kdb_buffer) - len; |
| 681 | goto kdb_print_out; | 681 | goto kdb_print_out; |
| 682 | } | 682 | } |
| 683 | if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH) | ||
| 684 | /* | ||
| 685 | * This was a interactive search (using '/' at more | ||
| 686 | * prompt) and it has completed. Clear the flag. | ||
| 687 | */ | ||
| 688 | kdb_grepping_flag = 0; | ||
| 683 | /* | 689 | /* |
| 684 | * at this point the string is a full line and | 690 | * at this point the string is a full line and |
| 685 | * should be printed, up to the null. | 691 | * should be printed, up to the null. |
| @@ -691,19 +697,20 @@ kdb_printit: | |||
| 691 | * Write to all consoles. | 697 | * Write to all consoles. |
| 692 | */ | 698 | */ |
| 693 | retlen = strlen(kdb_buffer); | 699 | retlen = strlen(kdb_buffer); |
| 700 | cp = (char *) printk_skip_level(kdb_buffer); | ||
| 694 | if (!dbg_kdb_mode && kgdb_connected) { | 701 | if (!dbg_kdb_mode && kgdb_connected) { |
| 695 | gdbstub_msg_write(kdb_buffer, retlen); | 702 | gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); |
| 696 | } else { | 703 | } else { |
| 697 | if (dbg_io_ops && !dbg_io_ops->is_console) { | 704 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
| 698 | len = retlen; | 705 | len = retlen - (cp - kdb_buffer); |
| 699 | cp = kdb_buffer; | 706 | cp2 = cp; |
| 700 | while (len--) { | 707 | while (len--) { |
| 701 | dbg_io_ops->write_char(*cp); | 708 | dbg_io_ops->write_char(*cp2); |
| 702 | cp++; | 709 | cp2++; |
| 703 | } | 710 | } |
| 704 | } | 711 | } |
| 705 | while (c) { | 712 | while (c) { |
| 706 | c->write(c, kdb_buffer, retlen); | 713 | c->write(c, cp, retlen - (cp - kdb_buffer)); |
| 707 | touch_nmi_watchdog(); | 714 | touch_nmi_watchdog(); |
| 708 | c = c->next; | 715 | c = c->next; |
| 709 | } | 716 | } |
| @@ -711,7 +718,10 @@ kdb_printit: | |||
| 711 | if (logging) { | 718 | if (logging) { |
| 712 | saved_loglevel = console_loglevel; | 719 | saved_loglevel = console_loglevel; |
| 713 | console_loglevel = CONSOLE_LOGLEVEL_SILENT; | 720 | console_loglevel = CONSOLE_LOGLEVEL_SILENT; |
| 714 | printk(KERN_INFO "%s", kdb_buffer); | 721 | if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK) |
| 722 | printk("%s", kdb_buffer); | ||
| 723 | else | ||
| 724 | pr_info("%s", kdb_buffer); | ||
| 715 | } | 725 | } |
| 716 | 726 | ||
| 717 | if (KDB_STATE(PAGER)) { | 727 | if (KDB_STATE(PAGER)) { |
| @@ -794,11 +804,23 @@ kdb_printit: | |||
| 794 | kdb_nextline = linecount - 1; | 804 | kdb_nextline = linecount - 1; |
| 795 | kdb_printf("\r"); | 805 | kdb_printf("\r"); |
| 796 | suspend_grep = 1; /* for this recursion */ | 806 | suspend_grep = 1; /* for this recursion */ |
| 807 | } else if (buf1[0] == '/' && !kdb_grepping_flag) { | ||
| 808 | kdb_printf("\r"); | ||
| 809 | kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN, | ||
| 810 | kdbgetenv("SEARCHPROMPT") ?: "search> "); | ||
| 811 | *strchrnul(kdb_grep_string, '\n') = '\0'; | ||
| 812 | kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH; | ||
| 813 | suspend_grep = 1; /* for this recursion */ | ||
| 797 | } else if (buf1[0] && buf1[0] != '\n') { | 814 | } else if (buf1[0] && buf1[0] != '\n') { |
| 798 | /* user hit something other than enter */ | 815 | /* user hit something other than enter */ |
| 799 | suspend_grep = 1; /* for this recursion */ | 816 | suspend_grep = 1; /* for this recursion */ |
| 800 | kdb_printf("\nOnly 'q' or 'Q' are processed at more " | 817 | if (buf1[0] != '/') |
| 801 | "prompt, input ignored\n"); | 818 | kdb_printf( |
| 819 | "\nOnly 'q', 'Q' or '/' are processed at " | ||
| 820 | "more prompt, input ignored\n"); | ||
| 821 | else | ||
| 822 | kdb_printf("\n'/' cannot be used during | " | ||
| 823 | "grep filtering, input ignored\n"); | ||
| 802 | } else if (kdb_grepping_flag) { | 824 | } else if (kdb_grepping_flag) { |
| 803 | /* user hit enter */ | 825 | /* user hit enter */ |
| 804 | suspend_grep = 1; /* for this recursion */ | 826 | suspend_grep = 1; /* for this recursion */ |
| @@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...) | |||
| 844 | int r; | 866 | int r; |
| 845 | 867 | ||
| 846 | va_start(ap, fmt); | 868 | va_start(ap, fmt); |
| 847 | r = vkdb_printf(fmt, ap); | 869 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); |
| 848 | va_end(ap); | 870 | va_end(ap); |
| 849 | 871 | ||
| 850 | return r; | 872 | return r; |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 7b40c5f07dce..4121345498e0 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -50,8 +50,7 @@ | |||
| 50 | static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; | 50 | static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; |
| 51 | module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); | 51 | module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); |
| 52 | 52 | ||
| 53 | #define GREP_LEN 256 | 53 | char kdb_grep_string[KDB_GREP_STRLEN]; |
| 54 | char kdb_grep_string[GREP_LEN]; | ||
| 55 | int kdb_grepping_flag; | 54 | int kdb_grepping_flag; |
| 56 | EXPORT_SYMBOL(kdb_grepping_flag); | 55 | EXPORT_SYMBOL(kdb_grepping_flag); |
| 57 | int kdb_grep_leading; | 56 | int kdb_grep_leading; |
| @@ -870,7 +869,7 @@ static void parse_grep(const char *str) | |||
| 870 | len = strlen(cp); | 869 | len = strlen(cp); |
| 871 | if (!len) | 870 | if (!len) |
| 872 | return; | 871 | return; |
| 873 | if (len >= GREP_LEN) { | 872 | if (len >= KDB_GREP_STRLEN) { |
| 874 | kdb_printf("search string too long\n"); | 873 | kdb_printf("search string too long\n"); |
| 875 | return; | 874 | return; |
| 876 | } | 875 | } |
| @@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr) | |||
| 915 | char *cp; | 914 | char *cp; |
| 916 | char *cpp, quoted; | 915 | char *cpp, quoted; |
| 917 | kdbtab_t *tp; | 916 | kdbtab_t *tp; |
| 918 | int i, escaped, ignore_errors = 0, check_grep; | 917 | int i, escaped, ignore_errors = 0, check_grep = 0; |
| 919 | 918 | ||
| 920 | /* | 919 | /* |
| 921 | * First tokenize the command string. | 920 | * First tokenize the command string. |
| 922 | */ | 921 | */ |
| 923 | cp = (char *)cmdstr; | 922 | cp = (char *)cmdstr; |
| 924 | kdb_grepping_flag = check_grep = 0; | ||
| 925 | 923 | ||
| 926 | if (KDB_FLAG(CMD_INTERRUPT)) { | 924 | if (KDB_FLAG(CMD_INTERRUPT)) { |
| 927 | /* Previous command was interrupted, newline must not | 925 | /* Previous command was interrupted, newline must not |
| @@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
| 1247 | kdb_printf("due to NonMaskable Interrupt @ " | 1245 | kdb_printf("due to NonMaskable Interrupt @ " |
| 1248 | kdb_machreg_fmt "\n", | 1246 | kdb_machreg_fmt "\n", |
| 1249 | instruction_pointer(regs)); | 1247 | instruction_pointer(regs)); |
| 1250 | kdb_dumpregs(regs); | ||
| 1251 | break; | 1248 | break; |
| 1252 | case KDB_REASON_SSTEP: | 1249 | case KDB_REASON_SSTEP: |
| 1253 | case KDB_REASON_BREAK: | 1250 | case KDB_REASON_BREAK: |
| @@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
| 1281 | */ | 1278 | */ |
| 1282 | kdb_nextline = 1; | 1279 | kdb_nextline = 1; |
| 1283 | KDB_STATE_CLEAR(SUPPRESS); | 1280 | KDB_STATE_CLEAR(SUPPRESS); |
| 1281 | kdb_grepping_flag = 0; | ||
| 1282 | /* ensure the old search does not leak into '/' commands */ | ||
| 1283 | kdb_grep_string[0] = '\0'; | ||
| 1284 | 1284 | ||
| 1285 | cmdbuf = cmd_cur; | 1285 | cmdbuf = cmd_cur; |
| 1286 | *cmdbuf = '\0'; | 1286 | *cmdbuf = '\0'; |
| @@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv) | |||
| 2256 | /* | 2256 | /* |
| 2257 | * Validate cpunum | 2257 | * Validate cpunum |
| 2258 | */ | 2258 | */ |
| 2259 | if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) | 2259 | if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) |
| 2260 | return KDB_BADCPUNUM; | 2260 | return KDB_BADCPUNUM; |
| 2261 | 2261 | ||
| 2262 | dbg_switch_cpu = cpunum; | 2262 | dbg_switch_cpu = cpunum; |
| @@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv) | |||
| 2583 | #define K(x) ((x) << (PAGE_SHIFT - 10)) | 2583 | #define K(x) ((x) << (PAGE_SHIFT - 10)) |
| 2584 | kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" | 2584 | kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" |
| 2585 | "Buffers: %8lu kB\n", | 2585 | "Buffers: %8lu kB\n", |
| 2586 | val.totalram, val.freeram, val.bufferram); | 2586 | K(val.totalram), K(val.freeram), K(val.bufferram)); |
| 2587 | return 0; | 2587 | return 0; |
| 2588 | } | 2588 | } |
| 2589 | 2589 | ||
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index eaacd1693954..75014d7f4568 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
| @@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t, | |||
| 196 | 196 | ||
| 197 | /* Miscellaneous functions and data areas */ | 197 | /* Miscellaneous functions and data areas */ |
| 198 | extern int kdb_grepping_flag; | 198 | extern int kdb_grepping_flag; |
| 199 | #define KDB_GREPPING_FLAG_SEARCH 0x8000 | ||
| 199 | extern char kdb_grep_string[]; | 200 | extern char kdb_grep_string[]; |
| 201 | #define KDB_GREP_STRLEN 256 | ||
| 200 | extern int kdb_grep_leading; | 202 | extern int kdb_grep_leading; |
| 201 | extern int kdb_grep_trailing; | 203 | extern int kdb_grep_trailing; |
| 202 | extern char *kdb_cmds[]; | 204 | extern char *kdb_cmds[]; |
| @@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p); | |||
| 209 | extern void kdb_print_nameval(const char *name, unsigned long val); | 211 | extern void kdb_print_nameval(const char *name, unsigned long val); |
| 210 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); | 212 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); |
| 211 | extern void kdb_meminfo_proc_show(void); | 213 | extern void kdb_meminfo_proc_show(void); |
| 212 | extern char *kdb_getstr(char *, size_t, char *); | 214 | extern char *kdb_getstr(char *, size_t, const char *); |
| 213 | extern void kdb_gdb_state_pass(char *buf); | 215 | extern void kdb_gdb_state_pass(char *buf); |
| 214 | 216 | ||
| 215 | /* Defines for kdb_symbol_print */ | 217 | /* Defines for kdb_symbol_print */ |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 3059bc2f022d..e16e5542bf13 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 1193 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | 1193 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); |
| 1194 | 1194 | ||
| 1195 | if (unlikely(ret)) { | 1195 | if (unlikely(ret)) { |
| 1196 | remove_waiter(lock, &waiter); | 1196 | if (rt_mutex_has_waiters(lock)) |
| 1197 | remove_waiter(lock, &waiter); | ||
| 1197 | rt_mutex_handle_deadlock(ret, chwalk, &waiter); | 1198 | rt_mutex_handle_deadlock(ret, chwalk, &waiter); |
| 1198 | } | 1199 | } |
| 1199 | 1200 | ||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c06df7de0963..01cfd69c54c6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
| @@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args) | |||
| 1811 | 1811 | ||
| 1812 | #ifdef CONFIG_KGDB_KDB | 1812 | #ifdef CONFIG_KGDB_KDB |
| 1813 | if (unlikely(kdb_trap_printk)) { | 1813 | if (unlikely(kdb_trap_printk)) { |
| 1814 | r = vkdb_printf(fmt, args); | 1814 | r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); |
| 1815 | return r; | 1815 | return r; |
| 1816 | } | 1816 | } |
| 1817 | #endif | 1817 | #endif |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0d7bbe3095ad..0a571e9a0f1d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -326,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
| 326 | special = t->rcu_read_unlock_special; | 326 | special = t->rcu_read_unlock_special; |
| 327 | if (special.b.need_qs) { | 327 | if (special.b.need_qs) { |
| 328 | rcu_preempt_qs(); | 328 | rcu_preempt_qs(); |
| 329 | t->rcu_read_unlock_special.b.need_qs = false; | ||
| 329 | if (!t->rcu_read_unlock_special.s) { | 330 | if (!t->rcu_read_unlock_special.s) { |
| 330 | local_irq_restore(flags); | 331 | local_irq_restore(flags); |
| 331 | return; | 332 | return; |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 8a2e230fb86a..eae160dd669d 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
| @@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void) | |||
| 87 | * so we don't have to move tasks around upon policy change, | 87 | * so we don't have to move tasks around upon policy change, |
| 88 | * or flail around trying to allocate bandwidth on the fly. | 88 | * or flail around trying to allocate bandwidth on the fly. |
| 89 | * A bandwidth exception in __sched_setscheduler() allows | 89 | * A bandwidth exception in __sched_setscheduler() allows |
| 90 | * the policy change to proceed. Thereafter, task_group() | 90 | * the policy change to proceed. |
| 91 | * returns &root_task_group, so zero bandwidth is required. | ||
| 92 | */ | 91 | */ |
| 93 | free_rt_sched_group(tg); | 92 | free_rt_sched_group(tg); |
| 94 | tg->rt_se = root_task_group.rt_se; | 93 | tg->rt_se = root_task_group.rt_se; |
| @@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
| 115 | if (tg != &root_task_group) | 114 | if (tg != &root_task_group) |
| 116 | return false; | 115 | return false; |
| 117 | 116 | ||
| 118 | if (p->sched_class != &fair_sched_class) | ||
| 119 | return false; | ||
| 120 | |||
| 121 | /* | 117 | /* |
| 122 | * We can only assume the task group can't go away on us if | 118 | * We can only assume the task group can't go away on us if |
| 123 | * autogroup_move_group() can see us on ->thread_group list. | 119 | * autogroup_move_group() can see us on ->thread_group list. |
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 7052d3fd4e7b..8d0f35debf35 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
| @@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x) | |||
| 274 | * first without taking the lock so we can | 274 | * first without taking the lock so we can |
| 275 | * return early in the blocking case. | 275 | * return early in the blocking case. |
| 276 | */ | 276 | */ |
| 277 | if (!ACCESS_ONCE(x->done)) | 277 | if (!READ_ONCE(x->done)) |
| 278 | return 0; | 278 | return 0; |
| 279 | 279 | ||
| 280 | spin_lock_irqsave(&x->wait.lock, flags); | 280 | spin_lock_irqsave(&x->wait.lock, flags); |
| @@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
| 297 | */ | 297 | */ |
| 298 | bool completion_done(struct completion *x) | 298 | bool completion_done(struct completion *x) |
| 299 | { | 299 | { |
| 300 | return !!ACCESS_ONCE(x->done); | 300 | if (!READ_ONCE(x->done)) |
| 301 | return false; | ||
| 302 | |||
| 303 | /* | ||
| 304 | * If ->done, we need to wait for complete() to release ->wait.lock | ||
| 305 | * otherwise we can end up freeing the completion before complete() | ||
| 306 | * is done referencing it. | ||
| 307 | * | ||
| 308 | * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders | ||
| 309 | * the loads of ->done and ->wait.lock such that we cannot observe | ||
| 310 | * the lock before complete() acquires it while observing the ->done | ||
| 311 | * after it's acquired the lock. | ||
| 312 | */ | ||
| 313 | smp_rmb(); | ||
| 314 | spin_unlock_wait(&x->wait.lock); | ||
| 315 | return true; | ||
| 301 | } | 316 | } |
| 302 | EXPORT_SYMBOL(completion_done); | 317 | EXPORT_SYMBOL(completion_done); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13049aac05a6..f0f831e8a345 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -307,66 +307,6 @@ __read_mostly int scheduler_running; | |||
| 307 | int sysctl_sched_rt_runtime = 950000; | 307 | int sysctl_sched_rt_runtime = 950000; |
| 308 | 308 | ||
| 309 | /* | 309 | /* |
| 310 | * __task_rq_lock - lock the rq @p resides on. | ||
| 311 | */ | ||
| 312 | static inline struct rq *__task_rq_lock(struct task_struct *p) | ||
| 313 | __acquires(rq->lock) | ||
| 314 | { | ||
| 315 | struct rq *rq; | ||
| 316 | |||
| 317 | lockdep_assert_held(&p->pi_lock); | ||
| 318 | |||
| 319 | for (;;) { | ||
| 320 | rq = task_rq(p); | ||
| 321 | raw_spin_lock(&rq->lock); | ||
| 322 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 323 | return rq; | ||
| 324 | raw_spin_unlock(&rq->lock); | ||
| 325 | |||
| 326 | while (unlikely(task_on_rq_migrating(p))) | ||
| 327 | cpu_relax(); | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | /* | ||
| 332 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
| 333 | */ | ||
| 334 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
| 335 | __acquires(p->pi_lock) | ||
| 336 | __acquires(rq->lock) | ||
| 337 | { | ||
| 338 | struct rq *rq; | ||
| 339 | |||
| 340 | for (;;) { | ||
| 341 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
| 342 | rq = task_rq(p); | ||
| 343 | raw_spin_lock(&rq->lock); | ||
| 344 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 345 | return rq; | ||
| 346 | raw_spin_unlock(&rq->lock); | ||
| 347 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 348 | |||
| 349 | while (unlikely(task_on_rq_migrating(p))) | ||
| 350 | cpu_relax(); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | static void __task_rq_unlock(struct rq *rq) | ||
| 355 | __releases(rq->lock) | ||
| 356 | { | ||
| 357 | raw_spin_unlock(&rq->lock); | ||
| 358 | } | ||
| 359 | |||
| 360 | static inline void | ||
| 361 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
| 362 | __releases(rq->lock) | ||
| 363 | __releases(p->pi_lock) | ||
| 364 | { | ||
| 365 | raw_spin_unlock(&rq->lock); | ||
| 366 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 367 | } | ||
| 368 | |||
| 369 | /* | ||
| 370 | * this_rq_lock - lock this runqueue and disable interrupts. | 310 | * this_rq_lock - lock this runqueue and disable interrupts. |
| 371 | */ | 311 | */ |
| 372 | static struct rq *this_rq_lock(void) | 312 | static struct rq *this_rq_lock(void) |
| @@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void) | |||
| 2899 | preempt_disable(); | 2839 | preempt_disable(); |
| 2900 | } | 2840 | } |
| 2901 | 2841 | ||
| 2902 | static void preempt_schedule_common(void) | 2842 | static void __sched notrace preempt_schedule_common(void) |
| 2903 | { | 2843 | { |
| 2904 | do { | 2844 | do { |
| 2905 | __preempt_count_add(PREEMPT_ACTIVE); | 2845 | __preempt_count_add(PREEMPT_ACTIVE); |
| @@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to); | |||
| 4418 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
| 4419 | * that process accounting knows that this is a task in IO wait state. | 4359 | * that process accounting knows that this is a task in IO wait state. |
| 4420 | */ | 4360 | */ |
| 4421 | void __sched io_schedule(void) | ||
| 4422 | { | ||
| 4423 | struct rq *rq = raw_rq(); | ||
| 4424 | |||
| 4425 | delayacct_blkio_start(); | ||
| 4426 | atomic_inc(&rq->nr_iowait); | ||
| 4427 | blk_flush_plug(current); | ||
| 4428 | current->in_iowait = 1; | ||
| 4429 | schedule(); | ||
| 4430 | current->in_iowait = 0; | ||
| 4431 | atomic_dec(&rq->nr_iowait); | ||
| 4432 | delayacct_blkio_end(); | ||
| 4433 | } | ||
| 4434 | EXPORT_SYMBOL(io_schedule); | ||
| 4435 | |||
| 4436 | long __sched io_schedule_timeout(long timeout) | 4361 | long __sched io_schedule_timeout(long timeout) |
| 4437 | { | 4362 | { |
| 4438 | struct rq *rq = raw_rq(); | 4363 | int old_iowait = current->in_iowait; |
| 4364 | struct rq *rq; | ||
| 4439 | long ret; | 4365 | long ret; |
| 4440 | 4366 | ||
| 4367 | current->in_iowait = 1; | ||
| 4368 | if (old_iowait) | ||
| 4369 | blk_schedule_flush_plug(current); | ||
| 4370 | else | ||
| 4371 | blk_flush_plug(current); | ||
| 4372 | |||
| 4441 | delayacct_blkio_start(); | 4373 | delayacct_blkio_start(); |
| 4374 | rq = raw_rq(); | ||
| 4442 | atomic_inc(&rq->nr_iowait); | 4375 | atomic_inc(&rq->nr_iowait); |
| 4443 | blk_flush_plug(current); | ||
| 4444 | current->in_iowait = 1; | ||
| 4445 | ret = schedule_timeout(timeout); | 4376 | ret = schedule_timeout(timeout); |
| 4446 | current->in_iowait = 0; | 4377 | current->in_iowait = old_iowait; |
| 4447 | atomic_dec(&rq->nr_iowait); | 4378 | atomic_dec(&rq->nr_iowait); |
| 4448 | delayacct_blkio_end(); | 4379 | delayacct_blkio_end(); |
| 4380 | |||
| 4449 | return ret; | 4381 | return ret; |
| 4450 | } | 4382 | } |
| 4383 | EXPORT_SYMBOL(io_schedule_timeout); | ||
| 4451 | 4384 | ||
| 4452 | /** | 4385 | /** |
| 4453 | * sys_sched_get_priority_max - return maximum RT priority. | 4386 | * sys_sched_get_priority_max - return maximum RT priority. |
| @@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg) | |||
| 7642 | { | 7575 | { |
| 7643 | struct task_struct *g, *p; | 7576 | struct task_struct *g, *p; |
| 7644 | 7577 | ||
| 7578 | /* | ||
| 7579 | * Autogroups do not have RT tasks; see autogroup_create(). | ||
| 7580 | */ | ||
| 7581 | if (task_group_is_autogroup(tg)) | ||
| 7582 | return 0; | ||
| 7583 | |||
| 7645 | for_each_process_thread(g, p) { | 7584 | for_each_process_thread(g, p) { |
| 7646 | if (rt_task(p) && task_group(p) == tg) | 7585 | if (rt_task(p) && task_group(p) == tg) |
| 7647 | return 1; | 7586 | return 1; |
| @@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg, | |||
| 7734 | { | 7673 | { |
| 7735 | int i, err = 0; | 7674 | int i, err = 0; |
| 7736 | 7675 | ||
| 7676 | /* | ||
| 7677 | * Disallowing the root group RT runtime is BAD, it would disallow the | ||
| 7678 | * kernel creating (and or operating) RT threads. | ||
| 7679 | */ | ||
| 7680 | if (tg == &root_task_group && rt_runtime == 0) | ||
| 7681 | return -EINVAL; | ||
| 7682 | |||
| 7683 | /* No period doesn't make any sense. */ | ||
| 7684 | if (rt_period == 0) | ||
| 7685 | return -EINVAL; | ||
| 7686 | |||
| 7737 | mutex_lock(&rt_constraints_mutex); | 7687 | mutex_lock(&rt_constraints_mutex); |
| 7738 | read_lock(&tasklist_lock); | 7688 | read_lock(&tasklist_lock); |
| 7739 | err = __rt_schedulable(tg, rt_period, rt_runtime); | 7689 | err = __rt_schedulable(tg, rt_period, rt_runtime); |
| @@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
| 7790 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | 7740 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; |
| 7791 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 7741 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
| 7792 | 7742 | ||
| 7793 | if (rt_period == 0) | ||
| 7794 | return -EINVAL; | ||
| 7795 | |||
| 7796 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 7743 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
| 7797 | } | 7744 | } |
| 7798 | 7745 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a027799ae130..3fa8fa6d9403 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
| 511 | struct sched_dl_entity, | 511 | struct sched_dl_entity, |
| 512 | dl_timer); | 512 | dl_timer); |
| 513 | struct task_struct *p = dl_task_of(dl_se); | 513 | struct task_struct *p = dl_task_of(dl_se); |
| 514 | unsigned long flags; | ||
| 514 | struct rq *rq; | 515 | struct rq *rq; |
| 515 | again: | ||
| 516 | rq = task_rq(p); | ||
| 517 | raw_spin_lock(&rq->lock); | ||
| 518 | 516 | ||
| 519 | if (rq != task_rq(p)) { | 517 | rq = task_rq_lock(current, &flags); |
| 520 | /* Task was moved, retrying. */ | ||
| 521 | raw_spin_unlock(&rq->lock); | ||
| 522 | goto again; | ||
| 523 | } | ||
| 524 | 518 | ||
| 525 | /* | 519 | /* |
| 526 | * We need to take care of several possible races here: | 520 | * We need to take care of several possible races here: |
| @@ -541,6 +535,26 @@ again: | |||
| 541 | 535 | ||
| 542 | sched_clock_tick(); | 536 | sched_clock_tick(); |
| 543 | update_rq_clock(rq); | 537 | update_rq_clock(rq); |
| 538 | |||
| 539 | /* | ||
| 540 | * If the throttle happened during sched-out; like: | ||
| 541 | * | ||
| 542 | * schedule() | ||
| 543 | * deactivate_task() | ||
| 544 | * dequeue_task_dl() | ||
| 545 | * update_curr_dl() | ||
| 546 | * start_dl_timer() | ||
| 547 | * __dequeue_task_dl() | ||
| 548 | * prev->on_rq = 0; | ||
| 549 | * | ||
| 550 | * We can be both throttled and !queued. Replenish the counter | ||
| 551 | * but do not enqueue -- wait for our wakeup to do that. | ||
| 552 | */ | ||
| 553 | if (!task_on_rq_queued(p)) { | ||
| 554 | replenish_dl_entity(dl_se, dl_se); | ||
| 555 | goto unlock; | ||
| 556 | } | ||
| 557 | |||
| 544 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 558 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
| 545 | if (dl_task(rq->curr)) | 559 | if (dl_task(rq->curr)) |
| 546 | check_preempt_curr_dl(rq, p, 0); | 560 | check_preempt_curr_dl(rq, p, 0); |
| @@ -555,7 +569,7 @@ again: | |||
| 555 | push_dl_task(rq); | 569 | push_dl_task(rq); |
| 556 | #endif | 570 | #endif |
| 557 | unlock: | 571 | unlock: |
| 558 | raw_spin_unlock(&rq->lock); | 572 | task_rq_unlock(rq, current, &flags); |
| 559 | 573 | ||
| 560 | return HRTIMER_NORESTART; | 574 | return HRTIMER_NORESTART; |
| 561 | } | 575 | } |
| @@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq) | |||
| 898 | rq->curr->dl.dl_yielded = 1; | 912 | rq->curr->dl.dl_yielded = 1; |
| 899 | p->dl.runtime = 0; | 913 | p->dl.runtime = 0; |
| 900 | } | 914 | } |
| 915 | update_rq_clock(rq); | ||
| 901 | update_curr_dl(rq); | 916 | update_curr_dl(rq); |
| 902 | } | 917 | } |
| 903 | 918 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0870db23d79c..dc0f435a2779 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { } | |||
| 1380 | 1380 | ||
| 1381 | extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); | 1381 | extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); |
| 1382 | 1382 | ||
| 1383 | /* | ||
| 1384 | * __task_rq_lock - lock the rq @p resides on. | ||
| 1385 | */ | ||
| 1386 | static inline struct rq *__task_rq_lock(struct task_struct *p) | ||
| 1387 | __acquires(rq->lock) | ||
| 1388 | { | ||
| 1389 | struct rq *rq; | ||
| 1390 | |||
| 1391 | lockdep_assert_held(&p->pi_lock); | ||
| 1392 | |||
| 1393 | for (;;) { | ||
| 1394 | rq = task_rq(p); | ||
| 1395 | raw_spin_lock(&rq->lock); | ||
| 1396 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 1397 | return rq; | ||
| 1398 | raw_spin_unlock(&rq->lock); | ||
| 1399 | |||
| 1400 | while (unlikely(task_on_rq_migrating(p))) | ||
| 1401 | cpu_relax(); | ||
| 1402 | } | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | /* | ||
| 1406 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
| 1407 | */ | ||
| 1408 | static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
| 1409 | __acquires(p->pi_lock) | ||
| 1410 | __acquires(rq->lock) | ||
| 1411 | { | ||
| 1412 | struct rq *rq; | ||
| 1413 | |||
| 1414 | for (;;) { | ||
| 1415 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
| 1416 | rq = task_rq(p); | ||
| 1417 | raw_spin_lock(&rq->lock); | ||
| 1418 | /* | ||
| 1419 | * move_queued_task() task_rq_lock() | ||
| 1420 | * | ||
| 1421 | * ACQUIRE (rq->lock) | ||
| 1422 | * [S] ->on_rq = MIGRATING [L] rq = task_rq() | ||
| 1423 | * WMB (__set_task_cpu()) ACQUIRE (rq->lock); | ||
| 1424 | * [S] ->cpu = new_cpu [L] task_rq() | ||
| 1425 | * [L] ->on_rq | ||
| 1426 | * RELEASE (rq->lock) | ||
| 1427 | * | ||
| 1428 | * If we observe the old cpu in task_rq_lock, the acquire of | ||
| 1429 | * the old rq->lock will fully serialize against the stores. | ||
| 1430 | * | ||
| 1431 | * If we observe the new cpu in task_rq_lock, the acquire will | ||
| 1432 | * pair with the WMB to ensure we must then also see migrating. | ||
| 1433 | */ | ||
| 1434 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) | ||
| 1435 | return rq; | ||
| 1436 | raw_spin_unlock(&rq->lock); | ||
| 1437 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 1438 | |||
| 1439 | while (unlikely(task_on_rq_migrating(p))) | ||
| 1440 | cpu_relax(); | ||
| 1441 | } | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | static inline void __task_rq_unlock(struct rq *rq) | ||
| 1445 | __releases(rq->lock) | ||
| 1446 | { | ||
| 1447 | raw_spin_unlock(&rq->lock); | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | static inline void | ||
| 1451 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
| 1452 | __releases(rq->lock) | ||
| 1453 | __releases(p->pi_lock) | ||
| 1454 | { | ||
| 1455 | raw_spin_unlock(&rq->lock); | ||
| 1456 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
| 1457 | } | ||
| 1458 | |||
| 1383 | #ifdef CONFIG_SMP | 1459 | #ifdef CONFIG_SMP |
| 1384 | #ifdef CONFIG_PREEMPT | 1460 | #ifdef CONFIG_PREEMPT |
| 1385 | 1461 | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4b585e0fdd22..0f60b08a4f07 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc) | |||
| 633 | if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) | 633 | if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) |
| 634 | return -EPERM; | 634 | return -EPERM; |
| 635 | 635 | ||
| 636 | if (txc->modes & ADJ_FREQUENCY) { | 636 | /* |
| 637 | if (LONG_MIN / PPM_SCALE > txc->freq) | 637 | * Check for potential multiplication overflows that can |
| 638 | * only happen on 64-bit systems: | ||
| 639 | */ | ||
| 640 | if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { | ||
| 641 | if (LLONG_MIN / PPM_SCALE > txc->freq) | ||
| 638 | return -EINVAL; | 642 | return -EINVAL; |
| 639 | if (LONG_MAX / PPM_SCALE < txc->freq) | 643 | if (LLONG_MAX / PPM_SCALE < txc->freq) |
| 640 | return -EINVAL; | 644 | return -EINVAL; |
| 641 | } | 645 | } |
| 642 | 646 | ||
