aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/kgdb.tmpl6
-rw-r--r--Documentation/x86/zero-page.txt2
-rw-r--r--arch/s390/include/asm/topology.h24
-rw-r--r--arch/s390/kernel/cache.c25
-rw-r--r--arch/s390/kernel/early.c12
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/smp.c54
-rw-r--r--arch/s390/kernel/topology.c134
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S6
-rw-r--r--arch/s390/mm/mmap.c5
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/Kconfig.debug13
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/aslr.c34
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S25
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S196
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/misc.h6
-rw-r--r--arch/x86/include/asm/apic.h8
-rw-r--r--arch/x86/include/asm/imr.h60
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/spinlock.h90
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c6
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/module.c10
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/uprobes.c153
-rw-r--r--arch/x86/mm/init.c28
-rw-r--r--arch/x86/mm/mmap.c6
-rw-r--r--arch/x86/platform/Makefile1
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S161
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S121
-rw-r--r--arch/x86/platform/intel-quark/Makefile2
-rw-r--r--arch/x86/platform/intel-quark/imr.c661
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c129
-rw-r--r--arch/x86/xen/spinlock.c13
-rw-r--r--drivers/char/ipmi/ipmi_devintf.c6
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c102
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c121
-rw-r--r--drivers/char/ipmi/ipmi_ssif.c6
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c16
-rw-r--r--drivers/platform/x86/Kconfig25
-rw-r--r--fs/binfmt_elf.c5
-rw-r--r--include/linux/compiler.h6
-rw-r--r--include/linux/kdb.h8
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/debug/debug_core.c19
-rw-r--r--kernel/debug/kdb/kdb_io.c46
-rw-r--r--kernel/debug/kdb/kdb_main.c16
-rw-r--r--kernel/debug/kdb/kdb_private.h4
-rw-r--r--kernel/locking/rtmutex.c3
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/rcu/tree_plugin.h1
-rw-r--r--kernel/sched/auto_group.c6
-rw-r--r--kernel/sched/completion.c19
-rw-r--r--kernel/sched/core.c113
-rw-r--r--kernel/sched/deadline.c33
-rw-r--r--kernel/sched/sched.h76
-rw-r--r--kernel/time/ntp.c10
64 files changed, 1978 insertions, 740 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 2428cc04dbc8..f3abca7ec53d 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -197,6 +197,7 @@
197 may be configured as a kernel built-in or a kernel loadable module. 197 may be configured as a kernel built-in or a kernel loadable module.
198 You can only make use of <constant>kgdbwait</constant> and early 198 You can only make use of <constant>kgdbwait</constant> and early
199 debugging if you build kgdboc into the kernel as a built-in. 199 debugging if you build kgdboc into the kernel as a built-in.
200 </para>
200 <para>Optionally you can elect to activate kms (Kernel Mode 201 <para>Optionally you can elect to activate kms (Kernel Mode
201 Setting) integration. When you use kms with kgdboc and you have a 202 Setting) integration. When you use kms with kgdboc and you have a
202 video driver that has atomic mode setting hooks, it is possible to 203 video driver that has atomic mode setting hooks, it is possible to
@@ -206,7 +207,6 @@
206 crashes or doing analysis of memory with kdb while allowing the 207 crashes or doing analysis of memory with kdb while allowing the
207 full graphics console applications to run. 208 full graphics console applications to run.
208 </para> 209 </para>
209 </para>
210 <sect2 id="kgdbocArgs"> 210 <sect2 id="kgdbocArgs">
211 <title>kgdboc arguments</title> 211 <title>kgdboc arguments</title>
212 <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para> 212 <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para>
@@ -284,7 +284,6 @@
284 </listitem> 284 </listitem>
285 </orderedlist> 285 </orderedlist>
286 </para> 286 </para>
287 </sect3>
288 <para>NOTE: Kgdboc does not support interrupting the target via the 287 <para>NOTE: Kgdboc does not support interrupting the target via the
289 gdb remote protocol. You must manually send a sysrq-g unless you 288 gdb remote protocol. You must manually send a sysrq-g unless you
290 have a proxy that splits console output to a terminal program. 289 have a proxy that splits console output to a terminal program.
@@ -305,6 +304,7 @@
305 as well as on the initial connect, or to use a debugger proxy that 304 as well as on the initial connect, or to use a debugger proxy that
306 allows an unmodified gdb to do the debugging. 305 allows an unmodified gdb to do the debugging.
307 </para> 306 </para>
307 </sect3>
308 </sect2> 308 </sect2>
309 </sect1> 309 </sect1>
310 <sect1 id="kgdbwait"> 310 <sect1 id="kgdbwait">
@@ -350,12 +350,12 @@
350 </para> 350 </para>
351 </listitem> 351 </listitem>
352 </orderedlist> 352 </orderedlist>
353 </para>
353 <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an 354 <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
354 active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant> 355 active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
355 </para> 356 </para>
356 <para>It is possible to use this option with kgdboc on a tty that is not a system console. 357 <para>It is possible to use this option with kgdboc on a tty that is not a system console.
357 </para> 358 </para>
358 </para>
359 </sect1> 359 </sect1>
360 <sect1 id="kgdbreboot"> 360 <sect1 id="kgdbreboot">
361 <title>Run time parameter: kgdbreboot</title> 361 <title>Run time parameter: kgdbreboot</title>
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 199f453cb4de..82fbdbc1e0b0 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit
3real-mode setup code of the kernel. References/settings to it mainly 3real-mode setup code of the kernel. References/settings to it mainly
4are in: 4are in:
5 5
6 arch/x86/include/asm/bootparam.h 6 arch/x86/include/uapi/asm/bootparam.h
7 7
8 8
9Offset Proto Name Meaning 9Offset Proto Name Meaning
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index c4fbb9527c5c..b1453a2ae1ca 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -18,15 +18,15 @@ struct cpu_topology_s390 {
18 cpumask_t book_mask; 18 cpumask_t book_mask;
19}; 19};
20 20
21extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; 21DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
22 22
23#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) 23#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
24#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id) 24#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
25#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_mask) 25#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask)
26#define topology_core_id(cpu) (cpu_topology[cpu].core_id) 26#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
27#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask) 27#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
28#define topology_book_id(cpu) (cpu_topology[cpu].book_id) 28#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
29#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask) 29#define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask)
30 30
31#define mc_capable() 1 31#define mc_capable() 1
32 32
@@ -51,14 +51,6 @@ static inline void topology_expect_change(void) { }
51#define POLARIZATION_VM (2) 51#define POLARIZATION_VM (2)
52#define POLARIZATION_VH (3) 52#define POLARIZATION_VH (3)
53 53
54#ifdef CONFIG_SCHED_BOOK
55void s390_init_cpu_topology(void);
56#else
57static inline void s390_init_cpu_topology(void)
58{
59};
60#endif
61
62#include <asm-generic/topology.h> 54#include <asm-generic/topology.h>
63 55
64#endif /* _ASM_S390_TOPOLOGY_H */ 56#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 632fa06ea162..0969d113b3d6 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -91,12 +91,9 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
91{ 91{
92 if (level >= CACHE_MAX_LEVEL) 92 if (level >= CACHE_MAX_LEVEL)
93 return CACHE_TYPE_NOCACHE; 93 return CACHE_TYPE_NOCACHE;
94
95 ci += level; 94 ci += level;
96
97 if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE) 95 if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
98 return CACHE_TYPE_NOCACHE; 96 return CACHE_TYPE_NOCACHE;
99
100 return cache_type_map[ci->type]; 97 return cache_type_map[ci->type];
101} 98}
102 99
@@ -111,23 +108,19 @@ static inline unsigned long ecag(int ai, int li, int ti)
111} 108}
112 109
113static void ci_leaf_init(struct cacheinfo *this_leaf, int private, 110static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
114 enum cache_type type, unsigned int level) 111 enum cache_type type, unsigned int level, int cpu)
115{ 112{
116 int ti, num_sets; 113 int ti, num_sets;
117 int cpu = smp_processor_id();
118 114
119 if (type == CACHE_TYPE_INST) 115 if (type == CACHE_TYPE_INST)
120 ti = CACHE_TI_INSTRUCTION; 116 ti = CACHE_TI_INSTRUCTION;
121 else 117 else
122 ti = CACHE_TI_UNIFIED; 118 ti = CACHE_TI_UNIFIED;
123
124 this_leaf->level = level + 1; 119 this_leaf->level = level + 1;
125 this_leaf->type = type; 120 this_leaf->type = type;
126 this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti); 121 this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
127 this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, 122 this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
128 level, ti);
129 this_leaf->size = ecag(EXTRACT_SIZE, level, ti); 123 this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
130
131 num_sets = this_leaf->size / this_leaf->coherency_line_size; 124 num_sets = this_leaf->size / this_leaf->coherency_line_size;
132 num_sets /= this_leaf->ways_of_associativity; 125 num_sets /= this_leaf->ways_of_associativity;
133 this_leaf->number_of_sets = num_sets; 126 this_leaf->number_of_sets = num_sets;
@@ -145,7 +138,6 @@ int init_cache_level(unsigned int cpu)
145 138
146 if (!this_cpu_ci) 139 if (!this_cpu_ci)
147 return -EINVAL; 140 return -EINVAL;
148
149 ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); 141 ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
150 do { 142 do {
151 ctype = get_cache_type(&ct.ci[0], level); 143 ctype = get_cache_type(&ct.ci[0], level);
@@ -154,34 +146,31 @@ int init_cache_level(unsigned int cpu)
154 /* Separate instruction and data caches */ 146 /* Separate instruction and data caches */
155 leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; 147 leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
156 } while (++level < CACHE_MAX_LEVEL); 148 } while (++level < CACHE_MAX_LEVEL);
157
158 this_cpu_ci->num_levels = level; 149 this_cpu_ci->num_levels = level;
159 this_cpu_ci->num_leaves = leaves; 150 this_cpu_ci->num_leaves = leaves;
160
161 return 0; 151 return 0;
162} 152}
163 153
164int populate_cache_leaves(unsigned int cpu) 154int populate_cache_leaves(unsigned int cpu)
165{ 155{
156 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
157 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
166 unsigned int level, idx, pvt; 158 unsigned int level, idx, pvt;
167 union cache_topology ct; 159 union cache_topology ct;
168 enum cache_type ctype; 160 enum cache_type ctype;
169 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
170 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
171 161
172 ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); 162 ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
173 for (idx = 0, level = 0; level < this_cpu_ci->num_levels && 163 for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
174 idx < this_cpu_ci->num_leaves; idx++, level++) { 164 idx < this_cpu_ci->num_leaves; idx++, level++) {
175 if (!this_leaf) 165 if (!this_leaf)
176 return -EINVAL; 166 return -EINVAL;
177
178 pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0; 167 pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
179 ctype = get_cache_type(&ct.ci[0], level); 168 ctype = get_cache_type(&ct.ci[0], level);
180 if (ctype == CACHE_TYPE_SEPARATE) { 169 if (ctype == CACHE_TYPE_SEPARATE) {
181 ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level); 170 ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
182 ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level); 171 ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
183 } else { 172 } else {
184 ci_leaf_init(this_leaf++, pvt, ctype, level); 173 ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
185 } 174 }
186 } 175 }
187 return 0; 176 return 0;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 70a329450901..4427ab7ac23a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -393,17 +393,19 @@ static __init void detect_machine_facilities(void)
393 S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; 393 S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
394 if (test_facility(129)) 394 if (test_facility(129))
395 S390_lowcore.machine_flags |= MACHINE_FLAG_VX; 395 S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
396 if (test_facility(128))
397 S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
398#endif 396#endif
399} 397}
400 398
401static int __init nocad_setup(char *str) 399static int __init cad_setup(char *str)
402{ 400{
403 S390_lowcore.machine_flags &= ~MACHINE_FLAG_CAD; 401 int val;
402
403 get_option(&str, &val);
404 if (val && test_facility(128))
405 S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
404 return 0; 406 return 0;
405} 407}
406early_param("nocad", nocad_setup); 408early_param("cad", cad_setup);
407 409
408static int __init cad_init(void) 410static int __init cad_init(void)
409{ 411{
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index bfac77ada4f2..a5ea8bc17cb3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -909,7 +909,6 @@ void __init setup_arch(char **cmdline_p)
909 setup_lowcore(); 909 setup_lowcore();
910 smp_fill_possible_mask(); 910 smp_fill_possible_mask();
911 cpu_init(); 911 cpu_init();
912 s390_init_cpu_topology();
913 912
914 /* 913 /*
915 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). 914 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a668993ff577..db8f1115a3bf 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,14 +59,13 @@ enum {
59 CPU_STATE_CONFIGURED, 59 CPU_STATE_CONFIGURED,
60}; 60};
61 61
62static DEFINE_PER_CPU(struct cpu *, cpu_device);
63
62struct pcpu { 64struct pcpu {
63 struct cpu *cpu;
64 struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ 65 struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
65 unsigned long async_stack; /* async stack for the cpu */
66 unsigned long panic_stack; /* panic stack for the cpu */
67 unsigned long ec_mask; /* bit mask for ec_xxx functions */ 66 unsigned long ec_mask; /* bit mask for ec_xxx functions */
68 int state; /* physical cpu state */ 67 signed char state; /* physical cpu state */
69 int polarization; /* physical polarization */ 68 signed char polarization; /* physical polarization */
70 u16 address; /* physical cpu address */ 69 u16 address; /* physical cpu address */
71}; 70};
72 71
@@ -173,25 +172,30 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
173 pcpu_sigp_retry(pcpu, order, 0); 172 pcpu_sigp_retry(pcpu, order, 0);
174} 173}
175 174
175#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
176#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
177
176static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 178static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
177{ 179{
180 unsigned long async_stack, panic_stack;
178 struct _lowcore *lc; 181 struct _lowcore *lc;
179 182
180 if (pcpu != &pcpu_devices[0]) { 183 if (pcpu != &pcpu_devices[0]) {
181 pcpu->lowcore = (struct _lowcore *) 184 pcpu->lowcore = (struct _lowcore *)
182 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 185 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
183 pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 186 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
184 pcpu->panic_stack = __get_free_page(GFP_KERNEL); 187 panic_stack = __get_free_page(GFP_KERNEL);
185 if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) 188 if (!pcpu->lowcore || !panic_stack || !async_stack)
186 goto out; 189 goto out;
190 } else {
191 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
192 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
187 } 193 }
188 lc = pcpu->lowcore; 194 lc = pcpu->lowcore;
189 memcpy(lc, &S390_lowcore, 512); 195 memcpy(lc, &S390_lowcore, 512);
190 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 196 memset((char *) lc + 512, 0, sizeof(*lc) - 512);
191 lc->async_stack = pcpu->async_stack + ASYNC_SIZE 197 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
192 - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 198 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
193 lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
194 - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
195 lc->cpu_nr = cpu; 199 lc->cpu_nr = cpu;
196 lc->spinlock_lockval = arch_spin_lockval(cpu); 200 lc->spinlock_lockval = arch_spin_lockval(cpu);
197#ifndef CONFIG_64BIT 201#ifndef CONFIG_64BIT
@@ -212,8 +216,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
212 return 0; 216 return 0;
213out: 217out:
214 if (pcpu != &pcpu_devices[0]) { 218 if (pcpu != &pcpu_devices[0]) {
215 free_page(pcpu->panic_stack); 219 free_page(panic_stack);
216 free_pages(pcpu->async_stack, ASYNC_ORDER); 220 free_pages(async_stack, ASYNC_ORDER);
217 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 221 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
218 } 222 }
219 return -ENOMEM; 223 return -ENOMEM;
@@ -235,11 +239,11 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
235#else 239#else
236 vdso_free_per_cpu(pcpu->lowcore); 240 vdso_free_per_cpu(pcpu->lowcore);
237#endif 241#endif
238 if (pcpu != &pcpu_devices[0]) { 242 if (pcpu == &pcpu_devices[0])
239 free_page(pcpu->panic_stack); 243 return;
240 free_pages(pcpu->async_stack, ASYNC_ORDER); 244 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
241 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 245 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
242 } 246 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
243} 247}
244 248
245#endif /* CONFIG_HOTPLUG_CPU */ 249#endif /* CONFIG_HOTPLUG_CPU */
@@ -366,7 +370,8 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
366void smp_call_ipl_cpu(void (*func)(void *), void *data) 370void smp_call_ipl_cpu(void (*func)(void *), void *data)
367{ 371{
368 pcpu_delegate(&pcpu_devices[0], func, data, 372 pcpu_delegate(&pcpu_devices[0], func, data,
369 pcpu_devices->panic_stack + PAGE_SIZE); 373 pcpu_devices->lowcore->panic_stack -
374 PANIC_FRAME_OFFSET + PAGE_SIZE);
370} 375}
371 376
372int smp_find_processor_id(u16 address) 377int smp_find_processor_id(u16 address)
@@ -935,10 +940,6 @@ void __init smp_prepare_boot_cpu(void)
935 pcpu->state = CPU_STATE_CONFIGURED; 940 pcpu->state = CPU_STATE_CONFIGURED;
936 pcpu->address = stap(); 941 pcpu->address = stap();
937 pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); 942 pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
938 pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
939 + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
940 pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
941 + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
942 S390_lowcore.percpu_offset = __per_cpu_offset[0]; 943 S390_lowcore.percpu_offset = __per_cpu_offset[0];
943 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); 944 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
944 set_cpu_present(0, true); 945 set_cpu_present(0, true);
@@ -1078,8 +1079,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
1078 void *hcpu) 1079 void *hcpu)
1079{ 1080{
1080 unsigned int cpu = (unsigned int)(long)hcpu; 1081 unsigned int cpu = (unsigned int)(long)hcpu;
1081 struct cpu *c = pcpu_devices[cpu].cpu; 1082 struct device *s = &per_cpu(cpu_device, cpu)->dev;
1082 struct device *s = &c->dev;
1083 int err = 0; 1083 int err = 0;
1084 1084
1085 switch (action & ~CPU_TASKS_FROZEN) { 1085 switch (action & ~CPU_TASKS_FROZEN) {
@@ -1102,7 +1102,7 @@ static int smp_add_present_cpu(int cpu)
1102 c = kzalloc(sizeof(*c), GFP_KERNEL); 1102 c = kzalloc(sizeof(*c), GFP_KERNEL);
1103 if (!c) 1103 if (!c)
1104 return -ENOMEM; 1104 return -ENOMEM;
1105 pcpu_devices[cpu].cpu = c; 1105 per_cpu(cpu_device, cpu) = c;
1106 s = &c->dev; 1106 s = &c->dev;
1107 c->hotpluggable = 1; 1107 c->hotpluggable = 1;
1108 rc = register_cpu(c, cpu); 1108 rc = register_cpu(c, cpu);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 24ee33f1af24..14da43b801d9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -7,14 +7,14 @@
7#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 7#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
8 8
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/bootmem.h>
11#include <linux/cpuset.h> 10#include <linux/cpuset.h>
12#include <linux/device.h> 11#include <linux/device.h>
13#include <linux/export.h> 12#include <linux/export.h>
14#include <linux/kernel.h> 13#include <linux/kernel.h>
15#include <linux/sched.h> 14#include <linux/sched.h>
16#include <linux/init.h>
17#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/slab.h>
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/mm.h> 20#include <linux/mm.h>
@@ -42,8 +42,8 @@ static DEFINE_SPINLOCK(topology_lock);
42static struct mask_info socket_info; 42static struct mask_info socket_info;
43static struct mask_info book_info; 43static struct mask_info book_info;
44 44
45struct cpu_topology_s390 cpu_topology[NR_CPUS]; 45DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
46EXPORT_SYMBOL_GPL(cpu_topology); 46EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
47 47
48static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 48static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
49{ 49{
@@ -90,15 +90,15 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
90 if (lcpu < 0) 90 if (lcpu < 0)
91 continue; 91 continue;
92 for (i = 0; i <= smp_cpu_mtid; i++) { 92 for (i = 0; i <= smp_cpu_mtid; i++) {
93 cpu_topology[lcpu + i].book_id = book->id; 93 per_cpu(cpu_topology, lcpu + i).book_id = book->id;
94 cpu_topology[lcpu + i].core_id = rcore; 94 per_cpu(cpu_topology, lcpu + i).core_id = rcore;
95 cpu_topology[lcpu + i].thread_id = lcpu + i; 95 per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
96 cpumask_set_cpu(lcpu + i, &book->mask); 96 cpumask_set_cpu(lcpu + i, &book->mask);
97 cpumask_set_cpu(lcpu + i, &socket->mask); 97 cpumask_set_cpu(lcpu + i, &socket->mask);
98 if (one_socket_per_cpu) 98 if (one_socket_per_cpu)
99 cpu_topology[lcpu + i].socket_id = rcore; 99 per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
100 else 100 else
101 cpu_topology[lcpu + i].socket_id = socket->id; 101 per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
102 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 102 smp_cpu_set_polarization(lcpu + i, tl_core->pp);
103 } 103 }
104 if (one_socket_per_cpu) 104 if (one_socket_per_cpu)
@@ -249,14 +249,14 @@ static void update_cpu_masks(void)
249 249
250 spin_lock_irqsave(&topology_lock, flags); 250 spin_lock_irqsave(&topology_lock, flags);
251 for_each_possible_cpu(cpu) { 251 for_each_possible_cpu(cpu) {
252 cpu_topology[cpu].thread_mask = cpu_thread_map(cpu); 252 per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
253 cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); 253 per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
254 cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); 254 per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
255 if (!MACHINE_HAS_TOPOLOGY) { 255 if (!MACHINE_HAS_TOPOLOGY) {
256 cpu_topology[cpu].thread_id = cpu; 256 per_cpu(cpu_topology, cpu).thread_id = cpu;
257 cpu_topology[cpu].core_id = cpu; 257 per_cpu(cpu_topology, cpu).core_id = cpu;
258 cpu_topology[cpu].socket_id = cpu; 258 per_cpu(cpu_topology, cpu).socket_id = cpu;
259 cpu_topology[cpu].book_id = cpu; 259 per_cpu(cpu_topology, cpu).book_id = cpu;
260 } 260 }
261 } 261 }
262 spin_unlock_irqrestore(&topology_lock, flags); 262 spin_unlock_irqrestore(&topology_lock, flags);
@@ -334,50 +334,6 @@ void topology_expect_change(void)
334 set_topology_timer(); 334 set_topology_timer();
335} 335}
336 336
337static int __init early_parse_topology(char *p)
338{
339 if (strncmp(p, "off", 3))
340 return 0;
341 topology_enabled = 0;
342 return 0;
343}
344early_param("topology", early_parse_topology);
345
346static void __init alloc_masks(struct sysinfo_15_1_x *info,
347 struct mask_info *mask, int offset)
348{
349 int i, nr_masks;
350
351 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
352 for (i = 0; i < info->mnest - offset; i++)
353 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
354 nr_masks = max(nr_masks, 1);
355 for (i = 0; i < nr_masks; i++) {
356 mask->next = alloc_bootmem_align(
357 roundup_pow_of_two(sizeof(struct mask_info)),
358 roundup_pow_of_two(sizeof(struct mask_info)));
359 mask = mask->next;
360 }
361}
362
363void __init s390_init_cpu_topology(void)
364{
365 struct sysinfo_15_1_x *info;
366 int i;
367
368 if (!MACHINE_HAS_TOPOLOGY)
369 return;
370 tl_info = alloc_bootmem_pages(PAGE_SIZE);
371 info = tl_info;
372 store_topology(info);
373 pr_info("The CPU configuration topology of the machine is:");
374 for (i = 0; i < TOPOLOGY_NR_MAG; i++)
375 printk(KERN_CONT " %d", info->mag[i]);
376 printk(KERN_CONT " / %d\n", info->mnest);
377 alloc_masks(info, &socket_info, 1);
378 alloc_masks(info, &book_info, 2);
379}
380
381static int cpu_management; 337static int cpu_management;
382 338
383static ssize_t dispatching_show(struct device *dev, 339static ssize_t dispatching_show(struct device *dev,
@@ -467,20 +423,29 @@ int topology_cpu_init(struct cpu *cpu)
467 423
468const struct cpumask *cpu_thread_mask(int cpu) 424const struct cpumask *cpu_thread_mask(int cpu)
469{ 425{
470 return &cpu_topology[cpu].thread_mask; 426 return &per_cpu(cpu_topology, cpu).thread_mask;
471} 427}
472 428
473 429
474const struct cpumask *cpu_coregroup_mask(int cpu) 430const struct cpumask *cpu_coregroup_mask(int cpu)
475{ 431{
476 return &cpu_topology[cpu].core_mask; 432 return &per_cpu(cpu_topology, cpu).core_mask;
477} 433}
478 434
479static const struct cpumask *cpu_book_mask(int cpu) 435static const struct cpumask *cpu_book_mask(int cpu)
480{ 436{
481 return &cpu_topology[cpu].book_mask; 437 return &per_cpu(cpu_topology, cpu).book_mask;
482} 438}
483 439
440static int __init early_parse_topology(char *p)
441{
442 if (strncmp(p, "off", 3))
443 return 0;
444 topology_enabled = 0;
445 return 0;
446}
447early_param("topology", early_parse_topology);
448
484static struct sched_domain_topology_level s390_topology[] = { 449static struct sched_domain_topology_level s390_topology[] = {
485 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 450 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
486 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 451 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
@@ -489,6 +454,42 @@ static struct sched_domain_topology_level s390_topology[] = {
489 { NULL, }, 454 { NULL, },
490}; 455};
491 456
457static void __init alloc_masks(struct sysinfo_15_1_x *info,
458 struct mask_info *mask, int offset)
459{
460 int i, nr_masks;
461
462 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
463 for (i = 0; i < info->mnest - offset; i++)
464 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
465 nr_masks = max(nr_masks, 1);
466 for (i = 0; i < nr_masks; i++) {
467 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
468 mask = mask->next;
469 }
470}
471
472static int __init s390_topology_init(void)
473{
474 struct sysinfo_15_1_x *info;
475 int i;
476
477 if (!MACHINE_HAS_TOPOLOGY)
478 return 0;
479 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
480 info = tl_info;
481 store_topology(info);
482 pr_info("The CPU configuration topology of the machine is:");
483 for (i = 0; i < TOPOLOGY_NR_MAG; i++)
484 printk(KERN_CONT " %d", info->mag[i]);
485 printk(KERN_CONT " / %d\n", info->mnest);
486 alloc_masks(info, &socket_info, 1);
487 alloc_masks(info, &book_info, 2);
488 set_sched_topology(s390_topology);
489 return 0;
490}
491early_initcall(s390_topology_init);
492
492static int __init topology_init(void) 493static int __init topology_init(void)
493{ 494{
494 if (MACHINE_HAS_TOPOLOGY) 495 if (MACHINE_HAS_TOPOLOGY)
@@ -498,10 +499,3 @@ static int __init topology_init(void)
498 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 499 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
499} 500}
500device_initcall(topology_init); 501device_initcall(topology_init);
501
502static int __init early_topology_init(void)
503{
504 set_sched_topology(s390_topology);
505 return 0;
506}
507early_initcall(early_topology_init);
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 7699e735ae28..61541fb93dc6 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -25,9 +25,7 @@ __kernel_clock_gettime:
25 je 4f 25 je 4f
26 cghi %r2,__CLOCK_REALTIME 26 cghi %r2,__CLOCK_REALTIME
27 je 5f 27 je 5f
28 cghi %r2,__CLOCK_THREAD_CPUTIME_ID 28 cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
29 je 9f
30 cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
31 je 9f 29 je 9f
32 cghi %r2,__CLOCK_MONOTONIC_COARSE 30 cghi %r2,__CLOCK_MONOTONIC_COARSE
33 je 3f 31 je 3f
@@ -106,7 +104,7 @@ __kernel_clock_gettime:
106 aghi %r15,16 104 aghi %r15,16
107 br %r14 105 br %r14
108 106
109 /* CLOCK_THREAD_CPUTIME_ID for this thread */ 107 /* CPUCLOCK_VIRT for this thread */
1109: icm %r0,15,__VDSO_ECTG_OK(%r5) 1089: icm %r0,15,__VDSO_ECTG_OK(%r5)
111 jz 12f 109 jz 12f
112 ear %r2,%a4 110 ear %r2,%a4
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index d008f638b2cd..179a2c20b01f 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -183,7 +183,10 @@ unsigned long randomize_et_dyn(void)
183{ 183{
184 unsigned long base; 184 unsigned long base;
185 185
186 base = (STACK_TOP / 3 * 2) & (~mmap_align_mask << PAGE_SHIFT); 186 base = STACK_TOP / 3 * 2;
187 if (!is_32bit_task())
188 /* Align to 4GB */
189 base &= ~((1UL << 32) - 1);
187 return base + mmap_rnd(); 190 return base + mmap_rnd();
188} 191}
189 192
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb1cf898ed3c..c2fb8a87dccb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -488,6 +488,22 @@ config X86_INTEL_MID
488 Intel MID platforms are based on an Intel processor and chipset which 488 Intel MID platforms are based on an Intel processor and chipset which
489 consume less power than most of the x86 derivatives. 489 consume less power than most of the x86 derivatives.
490 490
491config X86_INTEL_QUARK
492 bool "Intel Quark platform support"
493 depends on X86_32
494 depends on X86_EXTENDED_PLATFORM
495 depends on X86_PLATFORM_DEVICES
496 depends on X86_TSC
497 depends on PCI
498 depends on PCI_GOANY
499 depends on X86_IO_APIC
500 select IOSF_MBI
501 select INTEL_IMR
502 ---help---
503 Select to include support for Quark X1000 SoC.
504 Say Y here if you have a Quark based system such as the Arduino
505 compatible Intel Galileo.
506
491config X86_INTEL_LPSS 507config X86_INTEL_LPSS
492 bool "Intel Low Power Subsystem Support" 508 bool "Intel Low Power Subsystem Support"
493 depends on ACPI 509 depends on ACPI
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 61bd2ad94281..20028da8ae18 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -313,6 +313,19 @@ config DEBUG_NMI_SELFTEST
313 313
314 If unsure, say N. 314 If unsure, say N.
315 315
316config DEBUG_IMR_SELFTEST
317 bool "Isolated Memory Region self test"
318 default n
319 depends on INTEL_IMR
320 ---help---
321 This option enables automated sanity testing of the IMR code.
322 Some simple tests are run to verify IMR bounds checking, alignment
323 and overlapping. This option is really only useful if you are
324 debugging an IMR memory map or are modifying the IMR code and want to
325 test your changes.
326
327 If unsure say N here.
328
316config X86_DEBUG_STATIC_CPU_HAS 329config X86_DEBUG_STATIC_CPU_HAS
317 bool "Debug alternatives" 330 bool "Debug alternatives"
318 depends on DEBUG_KERNEL 331 depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 843feb3eb20b..0a291cdfaf77 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -51,6 +51,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
51 51
52vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ 52vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
53 $(objtree)/drivers/firmware/efi/libstub/lib.a 53 $(objtree)/drivers/firmware/efi/libstub/lib.a
54vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
54 55
55$(obj)/vmlinux: $(vmlinux-objs-y) FORCE 56$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
56 $(call if_changed,ld) 57 $(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index bb1376381985..7083c16cccba 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,6 +14,13 @@
14static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 14static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
15 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 15 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
16 16
17struct kaslr_setup_data {
18 __u64 next;
19 __u32 type;
20 __u32 len;
21 __u8 data[1];
22} kaslr_setup_data;
23
17#define I8254_PORT_CONTROL 0x43 24#define I8254_PORT_CONTROL 0x43
18#define I8254_PORT_COUNTER0 0x40 25#define I8254_PORT_COUNTER0 0x40
19#define I8254_CMD_READBACK 0xC0 26#define I8254_CMD_READBACK 0xC0
@@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum,
295 return slots_fetch_random(); 302 return slots_fetch_random();
296} 303}
297 304
298unsigned char *choose_kernel_location(unsigned char *input, 305static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
306{
307 struct setup_data *data;
308
309 kaslr_setup_data.type = SETUP_KASLR;
310 kaslr_setup_data.len = 1;
311 kaslr_setup_data.next = 0;
312 kaslr_setup_data.data[0] = enabled;
313
314 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
315
316 while (data && data->next)
317 data = (struct setup_data *)(unsigned long)data->next;
318
319 if (data)
320 data->next = (unsigned long)&kaslr_setup_data;
321 else
322 params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
323
324}
325
326unsigned char *choose_kernel_location(struct boot_params *params,
327 unsigned char *input,
299 unsigned long input_size, 328 unsigned long input_size,
300 unsigned char *output, 329 unsigned char *output,
301 unsigned long output_size) 330 unsigned long output_size)
@@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input,
306#ifdef CONFIG_HIBERNATION 335#ifdef CONFIG_HIBERNATION
307 if (!cmdline_find_option_bool("kaslr")) { 336 if (!cmdline_find_option_bool("kaslr")) {
308 debug_putstr("KASLR disabled by default...\n"); 337 debug_putstr("KASLR disabled by default...\n");
338 add_kaslr_setup_data(params, 0);
309 goto out; 339 goto out;
310 } 340 }
311#else 341#else
312 if (cmdline_find_option_bool("nokaslr")) { 342 if (cmdline_find_option_bool("nokaslr")) {
313 debug_putstr("KASLR disabled by cmdline...\n"); 343 debug_putstr("KASLR disabled by cmdline...\n");
344 add_kaslr_setup_data(params, 0);
314 goto out; 345 goto out;
315 } 346 }
316#endif 347#endif
348 add_kaslr_setup_data(params, 1);
317 349
318 /* Record the various known unsafe memory ranges. */ 350 /* Record the various known unsafe memory ranges. */
319 mem_avoid_init((unsigned long)input, input_size, 351 mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
index 7ff3632806b1..99494dff2113 100644
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -3,28 +3,3 @@
3#include <asm/processor-flags.h> 3#include <asm/processor-flags.h>
4 4
5#include "../../platform/efi/efi_stub_64.S" 5#include "../../platform/efi/efi_stub_64.S"
6
7#ifdef CONFIG_EFI_MIXED
8 .code64
9 .text
10ENTRY(efi64_thunk)
11 push %rbp
12 push %rbx
13
14 subq $16, %rsp
15 leaq efi_exit32(%rip), %rax
16 movl %eax, 8(%rsp)
17 leaq efi_gdt64(%rip), %rax
18 movl %eax, 4(%rsp)
19 movl %eax, 2(%rax) /* Fixup the gdt base address */
20 leaq efi32_boot_gdt(%rip), %rax
21 movl %eax, (%rsp)
22
23 call __efi64_thunk
24
25 addq $16, %rsp
26 pop %rbx
27 pop %rbp
28 ret
29ENDPROC(efi64_thunk)
30#endif /* CONFIG_EFI_MIXED */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000000..630384a4c14a
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,196 @@
1/*
2 * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
3 *
4 * Early support for invoking 32-bit EFI services from a 64-bit kernel.
5 *
6 * Because this thunking occurs before ExitBootServices() we have to
7 * restore the firmware's 32-bit GDT before we make EFI serivce calls,
8 * since the firmware's 32-bit IDT is still currently installed and it
9 * needs to be able to service interrupts.
10 *
11 * On the plus side, we don't have to worry about mangling 64-bit
12 * addresses into 32-bits because we're executing with an identify
13 * mapped pagetable and haven't transitioned to 64-bit virtual addresses
14 * yet.
15 */
16
17#include <linux/linkage.h>
18#include <asm/msr.h>
19#include <asm/page_types.h>
20#include <asm/processor-flags.h>
21#include <asm/segment.h>
22
23 .code64
24 .text
25ENTRY(efi64_thunk)
26 push %rbp
27 push %rbx
28
29 subq $8, %rsp
30 leaq efi_exit32(%rip), %rax
31 movl %eax, 4(%rsp)
32 leaq efi_gdt64(%rip), %rax
33 movl %eax, (%rsp)
34 movl %eax, 2(%rax) /* Fixup the gdt base address */
35
36 movl %ds, %eax
37 push %rax
38 movl %es, %eax
39 push %rax
40 movl %ss, %eax
41 push %rax
42
43 /*
44 * Convert x86-64 ABI params to i386 ABI
45 */
46 subq $32, %rsp
47 movl %esi, 0x0(%rsp)
48 movl %edx, 0x4(%rsp)
49 movl %ecx, 0x8(%rsp)
50 movq %r8, %rsi
51 movl %esi, 0xc(%rsp)
52 movq %r9, %rsi
53 movl %esi, 0x10(%rsp)
54
55 sgdt save_gdt(%rip)
56
57 leaq 1f(%rip), %rbx
58 movq %rbx, func_rt_ptr(%rip)
59
60 /*
61 * Switch to gdt with 32-bit segments. This is the firmware GDT
62 * that was installed when the kernel started executing. This
63 * pointer was saved at the EFI stub entry point in head_64.S.
64 */
65 leaq efi32_boot_gdt(%rip), %rax
66 lgdt (%rax)
67
68 pushq $__KERNEL_CS
69 leaq efi_enter32(%rip), %rax
70 pushq %rax
71 lretq
72
731: addq $32, %rsp
74
75 lgdt save_gdt(%rip)
76
77 pop %rbx
78 movl %ebx, %ss
79 pop %rbx
80 movl %ebx, %es
81 pop %rbx
82 movl %ebx, %ds
83
84 /*
85 * Convert 32-bit status code into 64-bit.
86 */
87 test %rax, %rax
88 jz 1f
89 movl %eax, %ecx
90 andl $0x0fffffff, %ecx
91 andl $0xf0000000, %eax
92 shl $32, %rax
93 or %rcx, %rax
941:
95 addq $8, %rsp
96 pop %rbx
97 pop %rbp
98 ret
99ENDPROC(efi64_thunk)
100
101ENTRY(efi_exit32)
102 movq func_rt_ptr(%rip), %rax
103 push %rax
104 mov %rdi, %rax
105 ret
106ENDPROC(efi_exit32)
107
108 .code32
109/*
110 * EFI service pointer must be in %edi.
111 *
112 * The stack should represent the 32-bit calling convention.
113 */
114ENTRY(efi_enter32)
115 movl $__KERNEL_DS, %eax
116 movl %eax, %ds
117 movl %eax, %es
118 movl %eax, %ss
119
120 /* Reload pgtables */
121 movl %cr3, %eax
122 movl %eax, %cr3
123
124 /* Disable paging */
125 movl %cr0, %eax
126 btrl $X86_CR0_PG_BIT, %eax
127 movl %eax, %cr0
128
129 /* Disable long mode via EFER */
130 movl $MSR_EFER, %ecx
131 rdmsr
132 btrl $_EFER_LME, %eax
133 wrmsr
134
135 call *%edi
136
137 /* We must preserve return value */
138 movl %eax, %edi
139
140 /*
141 * Some firmware will return with interrupts enabled. Be sure to
142 * disable them before we switch GDTs.
143 */
144 cli
145
146 movl 56(%esp), %eax
147 movl %eax, 2(%eax)
148 lgdtl (%eax)
149
150 movl %cr4, %eax
151 btsl $(X86_CR4_PAE_BIT), %eax
152 movl %eax, %cr4
153
154 movl %cr3, %eax
155 movl %eax, %cr3
156
157 movl $MSR_EFER, %ecx
158 rdmsr
159 btsl $_EFER_LME, %eax
160 wrmsr
161
162 xorl %eax, %eax
163 lldt %ax
164
165 movl 60(%esp), %eax
166 pushl $__KERNEL_CS
167 pushl %eax
168
169 /* Enable paging */
170 movl %cr0, %eax
171 btsl $X86_CR0_PG_BIT, %eax
172 movl %eax, %cr0
173 lret
174ENDPROC(efi_enter32)
175
176 .data
177 .balign 8
178 .global efi32_boot_gdt
179efi32_boot_gdt: .word 0
180 .quad 0
181
182save_gdt: .word 0
183 .quad 0
184func_rt_ptr: .quad 0
185
186 .global efi_gdt64
187efi_gdt64:
188 .word efi_gdt64_end - efi_gdt64
189 .long 0 /* Filled out by user */
190 .word 0
191 .quad 0x0000000000000000 /* NULL descriptor */
192 .quad 0x00af9a000000ffff /* __KERNEL_CS */
193 .quad 0x00cf92000000ffff /* __KERNEL_DS */
194 .quad 0x0080890000000000 /* TS descriptor */
195 .quad 0x0000000000000000 /* TS continued */
196efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864a64da..5903089c818f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
401 * the entire decompressed kernel plus relocation table, or the 401 * the entire decompressed kernel plus relocation table, or the
402 * entire decompressed kernel plus .bss and .brk sections. 402 * entire decompressed kernel plus .bss and .brk sections.
403 */ 403 */
404 output = choose_kernel_location(input_data, input_len, output, 404 output = choose_kernel_location(real_mode, input_data, input_len,
405 output,
405 output_len > run_size ? output_len 406 output_len > run_size ? output_len
406 : run_size); 407 : run_size);
407 408
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 04477d68403f..ee3576b2666b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
57 57
58#if CONFIG_RANDOMIZE_BASE 58#if CONFIG_RANDOMIZE_BASE
59/* aslr.c */ 59/* aslr.c */
60unsigned char *choose_kernel_location(unsigned char *input, 60unsigned char *choose_kernel_location(struct boot_params *params,
61 unsigned char *input,
61 unsigned long input_size, 62 unsigned long input_size,
62 unsigned char *output, 63 unsigned char *output,
63 unsigned long output_size); 64 unsigned long output_size);
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
65bool has_cpuflag(int flag); 66bool has_cpuflag(int flag);
66#else 67#else
67static inline 68static inline
68unsigned char *choose_kernel_location(unsigned char *input, 69unsigned char *choose_kernel_location(struct boot_params *params,
70 unsigned char *input,
69 unsigned long input_size, 71 unsigned long input_size,
70 unsigned char *output, 72 unsigned char *output,
71 unsigned long output_size) 73 unsigned long output_size)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 92003f3c8a42..efc3b22d896e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address);
213extern void setup_boot_APIC_clock(void); 213extern void setup_boot_APIC_clock(void);
214extern void setup_secondary_APIC_clock(void); 214extern void setup_secondary_APIC_clock(void);
215extern int APIC_init_uniprocessor(void); 215extern int APIC_init_uniprocessor(void);
216
217#ifdef CONFIG_X86_64
218static inline int apic_force_enable(unsigned long addr)
219{
220 return -1;
221}
222#else
216extern int apic_force_enable(unsigned long addr); 223extern int apic_force_enable(unsigned long addr);
224#endif
217 225
218extern int apic_bsp_setup(bool upmode); 226extern int apic_bsp_setup(bool upmode);
219extern void apic_ap_setup(void); 227extern void apic_ap_setup(void);
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
1/*
2 * imr.h: Isolated Memory Region API
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#ifndef _IMR_H
13#define _IMR_H
14
15#include <linux/types.h>
16
17/*
18 * IMR agent access mask bits
19 * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
20 * definitions.
21 */
22#define IMR_ESRAM_FLUSH BIT(31)
23#define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */
24#define IMR_RMU BIT(29)
25#define IMR_VC1_SAI_ID3 BIT(15)
26#define IMR_VC1_SAI_ID2 BIT(14)
27#define IMR_VC1_SAI_ID1 BIT(13)
28#define IMR_VC1_SAI_ID0 BIT(12)
29#define IMR_VC0_SAI_ID3 BIT(11)
30#define IMR_VC0_SAI_ID2 BIT(10)
31#define IMR_VC0_SAI_ID1 BIT(9)
32#define IMR_VC0_SAI_ID0 BIT(8)
33#define IMR_CPU_0 BIT(1) /* SMM mode */
34#define IMR_CPU BIT(0) /* Non SMM mode */
35#define IMR_ACCESS_NONE 0
36
37/*
38 * Read/Write access-all bits here include some reserved bits
39 * These are the values firmware uses and are accepted by hardware.
40 * The kernel defines read/write access-all in the same way as firmware
41 * in order to have a consistent and crisp definition across firmware,
42 * bootloader and kernel.
43 */
44#define IMR_READ_ACCESS_ALL 0xBFFFFFFF
45#define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF
46
47/* Number of IMRs provided by Quark X1000 SoC */
48#define QUARK_X1000_IMR_MAX 0x08
49#define QUARK_X1000_IMR_REGBASE 0x40
50
51/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
52#define IMR_ALIGN 0x400
53#define IMR_MASK (IMR_ALIGN - 1)
54
55int imr_add_range(phys_addr_t base, size_t size,
56 unsigned int rmask, unsigned int wmask, bool lock);
57
58int imr_remove_range(phys_addr_t base, size_t size);
59
60#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index f97fbe3abb67..95e11f79f123 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ extern int devmem_is_allowed(unsigned long pagenr);
51extern unsigned long max_low_pfn_mapped; 51extern unsigned long max_low_pfn_mapped;
52extern unsigned long max_pfn_mapped; 52extern unsigned long max_pfn_mapped;
53 53
54extern bool kaslr_enabled;
55
54static inline phys_addr_t get_max_mapped(void) 56static inline phys_addr_t get_max_mapped(void)
55{ 57{
56 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; 58 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 7050d864f520..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
46 46
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
48{ 48{
49 set_bit(0, (volatile unsigned long *)&lock->tickets.tail); 49 set_bit(0, (volatile unsigned long *)&lock->tickets.head);
50} 50}
51 51
52#else /* !CONFIG_PARAVIRT_SPINLOCKS */ 52#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
60} 60}
61 61
62#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 62#endif /* CONFIG_PARAVIRT_SPINLOCKS */
63static inline int __tickets_equal(__ticket_t one, __ticket_t two)
64{
65 return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
66}
67
68static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
69 __ticket_t head)
70{
71 if (head & TICKET_SLOWPATH_FLAG) {
72 arch_spinlock_t old, new;
73
74 old.tickets.head = head;
75 new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
76 old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
77 new.tickets.tail = old.tickets.tail;
78
79 /* try to clear slowpath flag when there are no contenders */
80 cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
81 }
82}
63 83
64static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) 84static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
65{ 85{
66 return lock.tickets.head == lock.tickets.tail; 86 return __tickets_equal(lock.tickets.head, lock.tickets.tail);
67} 87}
68 88
69/* 89/*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
87 if (likely(inc.head == inc.tail)) 107 if (likely(inc.head == inc.tail))
88 goto out; 108 goto out;
89 109
90 inc.tail &= ~TICKET_SLOWPATH_FLAG;
91 for (;;) { 110 for (;;) {
92 unsigned count = SPIN_THRESHOLD; 111 unsigned count = SPIN_THRESHOLD;
93 112
94 do { 113 do {
95 if (READ_ONCE(lock->tickets.head) == inc.tail) 114 inc.head = READ_ONCE(lock->tickets.head);
96 goto out; 115 if (__tickets_equal(inc.head, inc.tail))
116 goto clear_slowpath;
97 cpu_relax(); 117 cpu_relax();
98 } while (--count); 118 } while (--count);
99 __ticket_lock_spinning(lock, inc.tail); 119 __ticket_lock_spinning(lock, inc.tail);
100 } 120 }
101out: barrier(); /* make sure nothing creeps before the lock is taken */ 121clear_slowpath:
122 __ticket_check_and_clear_slowpath(lock, inc.head);
123out:
124 barrier(); /* make sure nothing creeps before the lock is taken */
102} 125}
103 126
104static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 127static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
106 arch_spinlock_t old, new; 129 arch_spinlock_t old, new;
107 130
108 old.tickets = READ_ONCE(lock->tickets); 131 old.tickets = READ_ONCE(lock->tickets);
109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) 132 if (!__tickets_equal(old.tickets.head, old.tickets.tail))
110 return 0; 133 return 0;
111 134
112 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); 135 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
136 new.head_tail &= ~TICKET_SLOWPATH_FLAG;
113 137
114 /* cmpxchg is a full barrier, so nothing can move before it */ 138 /* cmpxchg is a full barrier, so nothing can move before it */
115 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 139 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
116} 140}
117 141
118static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
119 arch_spinlock_t old)
120{
121 arch_spinlock_t new;
122
123 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
124
125 /* Perform the unlock on the "before" copy */
126 old.tickets.head += TICKET_LOCK_INC;
127
128 /* Clear the slowpath flag */
129 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
130
131 /*
132 * If the lock is uncontended, clear the flag - use cmpxchg in
133 * case it changes behind our back though.
134 */
135 if (new.tickets.head != new.tickets.tail ||
136 cmpxchg(&lock->head_tail, old.head_tail,
137 new.head_tail) != old.head_tail) {
138 /*
139 * Lock still has someone queued for it, so wake up an
140 * appropriate waiter.
141 */
142 __ticket_unlock_kick(lock, old.tickets.head);
143 }
144}
145
146static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147{ 143{
148 if (TICKET_SLOWPATH_FLAG && 144 if (TICKET_SLOWPATH_FLAG &&
149 static_key_false(&paravirt_ticketlocks_enabled)) { 145 static_key_false(&paravirt_ticketlocks_enabled)) {
150 arch_spinlock_t prev; 146 __ticket_t head;
151 147
152 prev = *lock; 148 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
153 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
154 149
155 /* add_smp() is a full mb() */ 150 head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
156 151
157 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) 152 if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
158 __ticket_unlock_slowpath(lock, prev); 153 head &= ~TICKET_SLOWPATH_FLAG;
154 __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
155 }
159 } else 156 } else
160 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); 157 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
161} 158}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164{ 161{
165 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 162 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166 163
167 return tmp.tail != tmp.head; 164 return !__tickets_equal(tmp.tail, tmp.head);
168} 165}
169 166
170static inline int arch_spin_is_contended(arch_spinlock_t *lock) 167static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171{ 168{
172 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 169 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173 170
174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; 171 tmp.head &= ~TICKET_SLOWPATH_FLAG;
172 return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
175} 173}
176#define arch_spin_is_contended arch_spin_is_contended 174#define arch_spin_is_contended arch_spin_is_contended
177 175
@@ -191,8 +189,8 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
191 * We need to check "unlocked" in a loop, tmp.head == head 189 * We need to check "unlocked" in a loop, tmp.head == head
192 * can be false positive because of overflow. 190 * can be false positive because of overflow.
193 */ 191 */
194 if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || 192 if (__tickets_equal(tmp.head, tmp.tail) ||
195 tmp.head != head) 193 !__tickets_equal(tmp.head, head))
196 break; 194 break;
197 195
198 cpu_relax(); 196 cpu_relax();
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..44e6dd7e36a2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
7#define SETUP_DTB 2 7#define SETUP_DTB 2
8#define SETUP_PCI 3 8#define SETUP_PCI 3
9#define SETUP_EFI 4 9#define SETUP_EFI 4
10#define SETUP_KASLR 5
10 11
11/* ram_size flags */ 12/* ram_size flags */
12#define RAMDISK_IMAGE_START_MASK 0x07FF 13#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae97ed0873c6..3d525c6124f6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
613{ 613{
614 int rc, irq, trigger, polarity; 614 int rc, irq, trigger, polarity;
615 615
616 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
617 *irqp = gsi;
618 return 0;
619 }
620
616 rc = acpi_get_override_irq(gsi, &trigger, &polarity); 621 rc = acpi_get_override_irq(gsi, &trigger, &polarity);
617 if (rc == 0) { 622 if (rc == 0) {
618 trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; 623 trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index c6826d1e8082..746e7fd08aad 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
196 struct microcode_header_intel mc_header; 196 struct microcode_header_intel mc_header;
197 unsigned int mc_size; 197 unsigned int mc_size;
198 198
199 if (leftover < sizeof(mc_header)) {
200 pr_err("error! Truncated header in microcode data file\n");
201 break;
202 }
203
199 if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) 204 if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
200 break; 205 break;
201 206
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index ec9df6f9cd47..420eb933189c 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start,
321 unsigned int mc_saved_count = mc_saved_data->mc_saved_count; 321 unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
322 int i; 322 int i;
323 323
324 while (leftover) { 324 while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
325
326 if (leftover < sizeof(mc_header))
327 break;
328
325 mc_header = (struct microcode_header_intel *)ucode_ptr; 329 mc_header = (struct microcode_header_intel *)ucode_ptr;
326 330
327 mc_size = get_totalsize(mc_header); 331 mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 705ef8d48e2d..67b1cbe0093a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void)
302 irq = __this_cpu_read(vector_irq[vector]); 302 irq = __this_cpu_read(vector_irq[vector]);
303 if (irq >= 0) { 303 if (irq >= 0) {
304 desc = irq_to_desc(irq); 304 desc = irq_to_desc(irq);
305 if (!desc)
306 continue;
307
305 data = irq_desc_get_irq_data(desc); 308 data = irq_desc_get_irq_data(desc);
306 cpumask_copy(&affinity_new, data->affinity); 309 cpumask_copy(&affinity_new, data->affinity);
307 cpu_clear(this_cpu, affinity_new); 310 cpu_clear(this_cpu, affinity_new);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 98f654d466e5..6a1146ea4d4d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = {
84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
85 /* ---------------------------------------------- */ 85 /* ---------------------------------------------- */
86 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ 86 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
87 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ 87 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
88 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ 88 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
89 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ 89 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
90 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ 90 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
609 u8 ret; 609 u8 ret;
610 u8 old; 610 u8 old;
611 611
612 old = ACCESS_ONCE(zero_stats); 612 old = READ_ONCE(zero_stats);
613 if (unlikely(old)) { 613 if (unlikely(old)) {
614 ret = cmpxchg(&zero_stats, old, 0); 614 ret = cmpxchg(&zero_stats, old, 0);
615 /* This ensures only one fellow resets the stat */ 615 /* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
727 int cpu; 727 int cpu;
728 u64 start; 728 u64 start;
729 unsigned long flags; 729 unsigned long flags;
730 __ticket_t head;
730 731
731 if (in_nmi()) 732 if (in_nmi())
732 return; 733 return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
768 */ 769 */
769 __ticket_enter_slowpath(lock); 770 __ticket_enter_slowpath(lock);
770 771
772 /* make sure enter_slowpath, which is atomic does not cross the read */
773 smp_mb__after_atomic();
774
771 /* 775 /*
772 * check again make sure it didn't become free while 776 * check again make sure it didn't become free while
773 * we weren't looking. 777 * we weren't looking.
774 */ 778 */
775 if (ACCESS_ONCE(lock->tickets.head) == want) { 779 head = READ_ONCE(lock->tickets.head);
780 if (__tickets_equal(head, want)) {
776 add_stats(TAKEN_SLOW_PICKUP, 1); 781 add_stats(TAKEN_SLOW_PICKUP, 1);
777 goto out; 782 goto out;
778 } 783 }
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
803 add_stats(RELEASED_SLOW, 1); 808 add_stats(RELEASED_SLOW, 1);
804 for_each_cpu(cpu, &waiting_cpus) { 809 for_each_cpu(cpu, &waiting_cpus) {
805 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); 810 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
806 if (ACCESS_ONCE(w->lock) == lock && 811 if (READ_ONCE(w->lock) == lock &&
807 ACCESS_ONCE(w->want) == ticket) { 812 READ_ONCE(w->want) == ticket) {
808 add_stats(RELEASED_SLOW_KICKED, 1); 813 add_stats(RELEASED_SLOW_KICKED, 1);
809 kvm_kick_cpu(cpu); 814 kvm_kick_cpu(cpu);
810 break; 815 break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b72c72..9bbb9b35c144 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -47,21 +47,13 @@ do { \
47 47
48#ifdef CONFIG_RANDOMIZE_BASE 48#ifdef CONFIG_RANDOMIZE_BASE
49static unsigned long module_load_offset; 49static unsigned long module_load_offset;
50static int randomize_modules = 1;
51 50
52/* Mutex protects the module_load_offset. */ 51/* Mutex protects the module_load_offset. */
53static DEFINE_MUTEX(module_kaslr_mutex); 52static DEFINE_MUTEX(module_kaslr_mutex);
54 53
55static int __init parse_nokaslr(char *p)
56{
57 randomize_modules = 0;
58 return 0;
59}
60early_param("nokaslr", parse_nokaslr);
61
62static unsigned long int get_module_load_offset(void) 54static unsigned long int get_module_load_offset(void)
63{ 55{
64 if (randomize_modules) { 56 if (kaslr_enabled) {
65 mutex_lock(&module_kaslr_mutex); 57 mutex_lock(&module_kaslr_mutex);
66 /* 58 /*
67 * Calculate the module_load_offset the first time this 59 * Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a2421cca01f..98dc9317286e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,6 +122,8 @@
122unsigned long max_low_pfn_mapped; 122unsigned long max_low_pfn_mapped;
123unsigned long max_pfn_mapped; 123unsigned long max_pfn_mapped;
124 124
125bool __read_mostly kaslr_enabled = false;
126
125#ifdef CONFIG_DMI 127#ifdef CONFIG_DMI
126RESERVE_BRK(dmi_alloc, 65536); 128RESERVE_BRK(dmi_alloc, 65536);
127#endif 129#endif
@@ -425,6 +427,11 @@ static void __init reserve_initrd(void)
425} 427}
426#endif /* CONFIG_BLK_DEV_INITRD */ 428#endif /* CONFIG_BLK_DEV_INITRD */
427 429
430static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
431{
432 kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
433}
434
428static void __init parse_setup_data(void) 435static void __init parse_setup_data(void)
429{ 436{
430 struct setup_data *data; 437 struct setup_data *data;
@@ -450,6 +457,9 @@ static void __init parse_setup_data(void)
450 case SETUP_EFI: 457 case SETUP_EFI:
451 parse_efi_setup(pa_data, data_len); 458 parse_efi_setup(pa_data, data_len);
452 break; 459 break;
460 case SETUP_KASLR:
461 parse_kaslr_setup(pa_data, data_len);
462 break;
453 default: 463 default:
454 break; 464 break;
455 } 465 }
@@ -832,10 +842,14 @@ static void __init trim_low_memory_range(void)
832static int 842static int
833dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) 843dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
834{ 844{
835 pr_emerg("Kernel Offset: 0x%lx from 0x%lx " 845 if (kaslr_enabled)
836 "(relocation range: 0x%lx-0x%lx)\n", 846 pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
837 (unsigned long)&_text - __START_KERNEL, __START_KERNEL, 847 (unsigned long)&_text - __START_KERNEL,
838 __START_KERNEL_map, MODULES_VADDR-1); 848 __START_KERNEL,
849 __START_KERNEL_map,
850 MODULES_VADDR-1);
851 else
852 pr_emerg("Kernel Offset: disabled\n");
839 853
840 return 0; 854 return 0;
841} 855}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a947021f..81f8adb0679e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -66,27 +66,54 @@
66 * Good-instruction tables for 32-bit apps. This is non-const and volatile 66 * Good-instruction tables for 32-bit apps. This is non-const and volatile
67 * to keep gcc from statically optimizing it out, as variable_test_bit makes 67 * to keep gcc from statically optimizing it out, as variable_test_bit makes
68 * some versions of gcc to think only *(unsigned long*) is used. 68 * some versions of gcc to think only *(unsigned long*) is used.
69 *
70 * Opcodes we'll probably never support:
71 * 6c-6f - ins,outs. SEGVs if used in userspace
72 * e4-e7 - in,out imm. SEGVs if used in userspace
73 * ec-ef - in,out acc. SEGVs if used in userspace
74 * cc - int3. SIGTRAP if used in userspace
75 * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
76 * (why we support bound (62) then? it's similar, and similarly unused...)
77 * f1 - int1. SIGTRAP if used in userspace
78 * f4 - hlt. SEGVs if used in userspace
79 * fa - cli. SEGVs if used in userspace
80 * fb - sti. SEGVs if used in userspace
81 *
82 * Opcodes which need some work to be supported:
83 * 07,17,1f - pop es/ss/ds
84 * Normally not used in userspace, but would execute if used.
85 * Can cause GP or stack exception if tries to load wrong segment descriptor.
86 * We hesitate to run them under single step since kernel's handling
87 * of userspace single-stepping (TF flag) is fragile.
88 * We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e)
89 * on the same grounds that they are never used.
90 * cd - int N.
91 * Used by userspace for "int 80" syscall entry. (Other "int N"
92 * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
93 * Not supported since kernel's handling of userspace single-stepping
94 * (TF flag) is fragile.
95 * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
69 */ 96 */
70#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 97#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
71static volatile u32 good_insns_32[256 / 32] = { 98static volatile u32 good_insns_32[256 / 32] = {
72 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 99 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
73 /* ---------------------------------------------- */ 100 /* ---------------------------------------------- */
74 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ 101 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */
75 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ 102 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
76 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ 103 W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
77 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ 104 W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
78 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ 105 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
79 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ 106 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
80 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ 107 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
81 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ 108 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
82 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ 109 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
83 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 110 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
84 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ 111 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
85 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ 112 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
86 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ 113 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
87 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ 114 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
88 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ 115 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
89 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ 116 W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
90 /* ---------------------------------------------- */ 117 /* ---------------------------------------------- */
91 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 118 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
92}; 119};
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = {
94#define good_insns_32 NULL 121#define good_insns_32 NULL
95#endif 122#endif
96 123
97/* Good-instruction tables for 64-bit apps */ 124/* Good-instruction tables for 64-bit apps.
125 *
126 * Genuinely invalid opcodes:
127 * 06,07 - formerly push/pop es
128 * 0e - formerly push cs
129 * 16,17 - formerly push/pop ss
130 * 1e,1f - formerly push/pop ds
131 * 27,2f,37,3f - formerly daa/das/aaa/aas
132 * 60,61 - formerly pusha/popa
133 * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
134 * 82 - formerly redundant encoding of Group1
135 * 9a - formerly call seg:ofs
136 * ce - formerly into
137 * d4,d5 - formerly aam/aad
138 * d6 - formerly undocumented salc
139 * ea - formerly jmp seg:ofs
140 *
141 * Opcodes we'll probably never support:
142 * 6c-6f - ins,outs. SEGVs if used in userspace
143 * e4-e7 - in,out imm. SEGVs if used in userspace
144 * ec-ef - in,out acc. SEGVs if used in userspace
145 * cc - int3. SIGTRAP if used in userspace
146 * f1 - int1. SIGTRAP if used in userspace
147 * f4 - hlt. SEGVs if used in userspace
148 * fa - cli. SEGVs if used in userspace
149 * fb - sti. SEGVs if used in userspace
150 *
151 * Opcodes which need some work to be supported:
152 * cd - int N.
153 * Used by userspace for "int 80" syscall entry. (Other "int N"
154 * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
155 * Not supported since kernel's handling of userspace single-stepping
156 * (TF flag) is fragile.
157 * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
158 */
98#if defined(CONFIG_X86_64) 159#if defined(CONFIG_X86_64)
99static volatile u32 good_insns_64[256 / 32] = { 160static volatile u32 good_insns_64[256 / 32] = {
100 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 161 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
101 /* ---------------------------------------------- */ 162 /* ---------------------------------------------- */
102 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ 163 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */
103 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ 164 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
104 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ 165 W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */
105 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ 166 W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */
106 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ 167 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
107 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ 168 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
108 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ 169 W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
109 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ 170 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
110 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ 171 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
111 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 172 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */
112 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ 173 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
113 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ 174 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
114 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ 175 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
115 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ 176 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
116 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ 177 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */
117 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ 178 W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
118 /* ---------------------------------------------- */ 179 /* ---------------------------------------------- */
119 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 180 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
120}; 181};
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = {
122#define good_insns_64 NULL 183#define good_insns_64 NULL
123#endif 184#endif
124 185
125/* Using this for both 64-bit and 32-bit apps */ 186/* Using this for both 64-bit and 32-bit apps.
187 * Opcodes we don't support:
188 * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
189 * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
190 * Also encodes tons of other system insns if mod=11.
191 * Some are in fact non-system: xend, xtest, rdtscp, maybe more
192 * 0f 05 - syscall
193 * 0f 06 - clts (CPL0 insn)
194 * 0f 07 - sysret
195 * 0f 08 - invd (CPL0 insn)
196 * 0f 09 - wbinvd (CPL0 insn)
197 * 0f 0b - ud2
198 * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
199 * 0f 34 - sysenter
200 * 0f 35 - sysexit
201 * 0f 37 - getsec
202 * 0f 78 - vmread (Intel VMX. CPL0 insn)
203 * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
204 * Note: with prefixes, these two opcodes are
205 * extrq/insertq/AVX512 convert vector ops.
206 * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
207 * {rd,wr}{fs,gs}base,{s,l,m}fence.
208 * Why? They are all user-executable.
209 */
126static volatile u32 good_2byte_insns[256 / 32] = { 210static volatile u32 good_2byte_insns[256 / 32] = {
127 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 211 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
128 /* ---------------------------------------------- */ 212 /* ---------------------------------------------- */
129 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ 213 W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */
130 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ 214 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
131 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ 215 W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
132 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ 216 W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
133 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ 217 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
134 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ 218 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
135 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ 219 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
136 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ 220 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */
137 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ 221 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
138 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 222 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
139 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ 223 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
140 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ 224 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
141 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ 225 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
142 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ 226 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
143 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ 227 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
144 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ 228 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) /* f0 */
145 /* ---------------------------------------------- */ 229 /* ---------------------------------------------- */
146 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 230 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
147}; 231};
148#undef W 232#undef W
149 233
150/* 234/*
151 * opcodes we'll probably never support:
152 *
153 * 6c-6d, e4-e5, ec-ed - in
154 * 6e-6f, e6-e7, ee-ef - out
155 * cc, cd - int3, int
156 * cf - iret
157 * d6 - illegal instruction
158 * f1 - int1/icebp
159 * f4 - hlt
160 * fa, fb - cli, sti
161 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
162 *
163 * invalid opcodes in 64-bit mode:
164 *
165 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
166 * 63 - we support this opcode in x86_64 but not in i386.
167 *
168 * opcodes we may need to refine support for: 235 * opcodes we may need to refine support for:
169 * 236 *
170 * 0f - 2-byte instructions: For many of these instructions, the validity 237 * 0f - 2-byte instructions: For many of these instructions, the validity
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 553c094b9cd7..a110efca6d06 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
238 } 238 }
239} 239}
240 240
241static const char *page_size_string(struct map_range *mr)
242{
243 static const char str_1g[] = "1G";
244 static const char str_2m[] = "2M";
245 static const char str_4m[] = "4M";
246 static const char str_4k[] = "4k";
247
248 if (mr->page_size_mask & (1<<PG_LEVEL_1G))
249 return str_1g;
250 /*
251 * 32-bit without PAE has a 4M large page size.
252 * PG_LEVEL_2M is misnamed, but we can at least
253 * print out the right size in the string.
254 */
255 if (IS_ENABLED(CONFIG_X86_32) &&
256 !IS_ENABLED(CONFIG_X86_PAE) &&
257 mr->page_size_mask & (1<<PG_LEVEL_2M))
258 return str_4m;
259
260 if (mr->page_size_mask & (1<<PG_LEVEL_2M))
261 return str_2m;
262
263 return str_4k;
264}
265
241static int __meminit split_mem_range(struct map_range *mr, int nr_range, 266static int __meminit split_mem_range(struct map_range *mr, int nr_range,
242 unsigned long start, 267 unsigned long start,
243 unsigned long end) 268 unsigned long end)
@@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
333 for (i = 0; i < nr_range; i++) 358 for (i = 0; i < nr_range; i++)
334 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", 359 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
335 mr[i].start, mr[i].end - 1, 360 mr[i].start, mr[i].end - 1,
336 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( 361 page_size_string(&mr[i]));
337 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
338 362
339 return nr_range; 363 return nr_range;
340} 364}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 919b91205cd4..df4552bd239e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = {
35 .flags = -1, 35 .flags = -1,
36}; 36};
37 37
38static unsigned int stack_maxrandom_size(void) 38static unsigned long stack_maxrandom_size(void)
39{ 39{
40 unsigned int max = 0; 40 unsigned long max = 0;
41 if ((current->flags & PF_RANDOMIZE) && 41 if ((current->flags & PF_RANDOMIZE) &&
42 !(current->personality & ADDR_NO_RANDOMIZE)) { 42 !(current->personality & ADDR_NO_RANDOMIZE)) {
43 max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; 43 max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
44 } 44 }
45 45
46 return max; 46 return max;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 85afde1fa3e5..a62e0be3a2f1 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -5,6 +5,7 @@ obj-y += geode/
5obj-y += goldfish/ 5obj-y += goldfish/
6obj-y += iris/ 6obj-y += iris/
7obj-y += intel-mid/ 7obj-y += intel-mid/
8obj-y += intel-quark/
8obj-y += olpc/ 9obj-y += olpc/
9obj-y += scx200/ 10obj-y += scx200/
10obj-y += sfi/ 11obj-y += sfi/
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 5fcda7272550..86d0f9e08dd9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -91,167 +91,6 @@ ENTRY(efi_call)
91 ret 91 ret
92ENDPROC(efi_call) 92ENDPROC(efi_call)
93 93
94#ifdef CONFIG_EFI_MIXED
95
96/*
97 * We run this function from the 1:1 mapping.
98 *
99 * This function must be invoked with a 1:1 mapped stack.
100 */
101ENTRY(__efi64_thunk)
102 movl %ds, %eax
103 push %rax
104 movl %es, %eax
105 push %rax
106 movl %ss, %eax
107 push %rax
108
109 subq $32, %rsp
110 movl %esi, 0x0(%rsp)
111 movl %edx, 0x4(%rsp)
112 movl %ecx, 0x8(%rsp)
113 movq %r8, %rsi
114 movl %esi, 0xc(%rsp)
115 movq %r9, %rsi
116 movl %esi, 0x10(%rsp)
117
118 sgdt save_gdt(%rip)
119
120 leaq 1f(%rip), %rbx
121 movq %rbx, func_rt_ptr(%rip)
122
123 /* Switch to gdt with 32-bit segments */
124 movl 64(%rsp), %eax
125 lgdt (%rax)
126
127 leaq efi_enter32(%rip), %rax
128 pushq $__KERNEL_CS
129 pushq %rax
130 lretq
131
1321: addq $32, %rsp
133
134 lgdt save_gdt(%rip)
135
136 pop %rbx
137 movl %ebx, %ss
138 pop %rbx
139 movl %ebx, %es
140 pop %rbx
141 movl %ebx, %ds
142
143 /*
144 * Convert 32-bit status code into 64-bit.
145 */
146 test %rax, %rax
147 jz 1f
148 movl %eax, %ecx
149 andl $0x0fffffff, %ecx
150 andl $0xf0000000, %eax
151 shl $32, %rax
152 or %rcx, %rax
1531:
154 ret
155ENDPROC(__efi64_thunk)
156
157ENTRY(efi_exit32)
158 movq func_rt_ptr(%rip), %rax
159 push %rax
160 mov %rdi, %rax
161 ret
162ENDPROC(efi_exit32)
163
164 .code32
165/*
166 * EFI service pointer must be in %edi.
167 *
168 * The stack should represent the 32-bit calling convention.
169 */
170ENTRY(efi_enter32)
171 movl $__KERNEL_DS, %eax
172 movl %eax, %ds
173 movl %eax, %es
174 movl %eax, %ss
175
176 /* Reload pgtables */
177 movl %cr3, %eax
178 movl %eax, %cr3
179
180 /* Disable paging */
181 movl %cr0, %eax
182 btrl $X86_CR0_PG_BIT, %eax
183 movl %eax, %cr0
184
185 /* Disable long mode via EFER */
186 movl $MSR_EFER, %ecx
187 rdmsr
188 btrl $_EFER_LME, %eax
189 wrmsr
190
191 call *%edi
192
193 /* We must preserve return value */
194 movl %eax, %edi
195
196 /*
197 * Some firmware will return with interrupts enabled. Be sure to
198 * disable them before we switch GDTs.
199 */
200 cli
201
202 movl 68(%esp), %eax
203 movl %eax, 2(%eax)
204 lgdtl (%eax)
205
206 movl %cr4, %eax
207 btsl $(X86_CR4_PAE_BIT), %eax
208 movl %eax, %cr4
209
210 movl %cr3, %eax
211 movl %eax, %cr3
212
213 movl $MSR_EFER, %ecx
214 rdmsr
215 btsl $_EFER_LME, %eax
216 wrmsr
217
218 xorl %eax, %eax
219 lldt %ax
220
221 movl 72(%esp), %eax
222 pushl $__KERNEL_CS
223 pushl %eax
224
225 /* Enable paging */
226 movl %cr0, %eax
227 btsl $X86_CR0_PG_BIT, %eax
228 movl %eax, %cr0
229 lret
230ENDPROC(efi_enter32)
231
232 .data
233 .balign 8
234 .global efi32_boot_gdt
235efi32_boot_gdt: .word 0
236 .quad 0
237
238save_gdt: .word 0
239 .quad 0
240func_rt_ptr: .quad 0
241
242 .global efi_gdt64
243efi_gdt64:
244 .word efi_gdt64_end - efi_gdt64
245 .long 0 /* Filled out by user */
246 .word 0
247 .quad 0x0000000000000000 /* NULL descriptor */
248 .quad 0x00af9a000000ffff /* __KERNEL_CS */
249 .quad 0x00cf92000000ffff /* __KERNEL_DS */
250 .quad 0x0080890000000000 /* TS descriptor */
251 .quad 0x0000000000000000 /* TS continued */
252efi_gdt64_end:
253#endif /* CONFIG_EFI_MIXED */
254
255 .data 94 .data
256ENTRY(efi_scratch) 95ENTRY(efi_scratch)
257 .fill 3,8,0 96 .fill 3,8,0
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 8806fa73e6e6..ff85d28c50f2 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -1,9 +1,26 @@
1/* 1/*
2 * Copyright (C) 2014 Intel Corporation; author Matt Fleming 2 * Copyright (C) 2014 Intel Corporation; author Matt Fleming
3 *
4 * Support for invoking 32-bit EFI runtime services from a 64-bit
5 * kernel.
6 *
7 * The below thunking functions are only used after ExitBootServices()
8 * has been called. This simplifies things considerably as compared with
9 * the early EFI thunking because we can leave all the kernel state
10 * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime
11 * services from __KERNEL32_CS. This means we can continue to service
12 * interrupts across an EFI mixed mode call.
13 *
14 * We do however, need to handle the fact that we're running in a full
15 * 64-bit virtual address space. Things like the stack and instruction
16 * addresses need to be accessible by the 32-bit firmware, so we rely on
17 * using the identity mappings in the EFI page table to access the stack
18 * and kernel text (see efi_setup_page_tables()).
3 */ 19 */
4 20
5#include <linux/linkage.h> 21#include <linux/linkage.h>
6#include <asm/page_types.h> 22#include <asm/page_types.h>
23#include <asm/segment.h>
7 24
8 .text 25 .text
9 .code64 26 .code64
@@ -33,14 +50,6 @@ ENTRY(efi64_thunk)
33 leaq efi_exit32(%rip), %rbx 50 leaq efi_exit32(%rip), %rbx
34 subq %rax, %rbx 51 subq %rax, %rbx
35 movl %ebx, 8(%rsp) 52 movl %ebx, 8(%rsp)
36 leaq efi_gdt64(%rip), %rbx
37 subq %rax, %rbx
38 movl %ebx, 2(%ebx)
39 movl %ebx, 4(%rsp)
40 leaq efi_gdt32(%rip), %rbx
41 subq %rax, %rbx
42 movl %ebx, 2(%ebx)
43 movl %ebx, (%rsp)
44 53
45 leaq __efi64_thunk(%rip), %rbx 54 leaq __efi64_thunk(%rip), %rbx
46 subq %rax, %rbx 55 subq %rax, %rbx
@@ -52,14 +61,92 @@ ENTRY(efi64_thunk)
52 retq 61 retq
53ENDPROC(efi64_thunk) 62ENDPROC(efi64_thunk)
54 63
55 .data 64/*
56efi_gdt32: 65 * We run this function from the 1:1 mapping.
57 .word efi_gdt32_end - efi_gdt32 66 *
58 .long 0 /* Filled out above */ 67 * This function must be invoked with a 1:1 mapped stack.
59 .word 0 68 */
60 .quad 0x0000000000000000 /* NULL descriptor */ 69ENTRY(__efi64_thunk)
61 .quad 0x00cf9a000000ffff /* __KERNEL_CS */ 70 movl %ds, %eax
62 .quad 0x00cf93000000ffff /* __KERNEL_DS */ 71 push %rax
63efi_gdt32_end: 72 movl %es, %eax
73 push %rax
74 movl %ss, %eax
75 push %rax
76
77 subq $32, %rsp
78 movl %esi, 0x0(%rsp)
79 movl %edx, 0x4(%rsp)
80 movl %ecx, 0x8(%rsp)
81 movq %r8, %rsi
82 movl %esi, 0xc(%rsp)
83 movq %r9, %rsi
84 movl %esi, 0x10(%rsp)
85
86 leaq 1f(%rip), %rbx
87 movq %rbx, func_rt_ptr(%rip)
88
89 /* Switch to 32-bit descriptor */
90 pushq $__KERNEL32_CS
91 leaq efi_enter32(%rip), %rax
92 pushq %rax
93 lretq
94
951: addq $32, %rsp
96
97 pop %rbx
98 movl %ebx, %ss
99 pop %rbx
100 movl %ebx, %es
101 pop %rbx
102 movl %ebx, %ds
64 103
104 /*
105 * Convert 32-bit status code into 64-bit.
106 */
107 test %rax, %rax
108 jz 1f
109 movl %eax, %ecx
110 andl $0x0fffffff, %ecx
111 andl $0xf0000000, %eax
112 shl $32, %rax
113 or %rcx, %rax
1141:
115 ret
116ENDPROC(__efi64_thunk)
117
118ENTRY(efi_exit32)
119 movq func_rt_ptr(%rip), %rax
120 push %rax
121 mov %rdi, %rax
122 ret
123ENDPROC(efi_exit32)
124
125 .code32
126/*
127 * EFI service pointer must be in %edi.
128 *
129 * The stack should represent the 32-bit calling convention.
130 */
131ENTRY(efi_enter32)
132 movl $__KERNEL_DS, %eax
133 movl %eax, %ds
134 movl %eax, %es
135 movl %eax, %ss
136
137 call *%edi
138
139 /* We must preserve return value */
140 movl %eax, %edi
141
142 movl 72(%esp), %eax
143 pushl $__KERNEL_CS
144 pushl %eax
145
146 lret
147ENDPROC(efi_enter32)
148
149 .data
150 .balign 8
151func_rt_ptr: .quad 0
65efi_saved_sp: .quad 0 152efi_saved_sp: .quad 0
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
new file mode 100644
index 000000000000..9cc57ed36022
--- /dev/null
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INTEL_IMR) += imr.o
2obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
new file mode 100644
index 000000000000..0ee619f9fcb7
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -0,0 +1,661 @@
1/**
2 * imr.c
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
6 *
7 * IMR registers define an isolated region of memory that can
8 * be masked to prohibit certain system agents from accessing memory.
9 * When a device behind a masked port performs an access - snooped or
10 * not, an IMR may optionally prevent that transaction from changing
11 * the state of memory or from getting correct data in response to the
12 * operation.
13 *
14 * Write data will be dropped and reads will return 0xFFFFFFFF, the
15 * system will reset and system BIOS will print out an error message to
16 * inform the user that an IMR has been violated.
17 *
18 * This code is based on the Linux MTRR code and reference code from
19 * Intel's Quark BSP EFI, Linux and grub code.
20 *
21 * See quark-x1000-datasheet.pdf for register definitions.
22 * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf
23 */
24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27#include <asm-generic/sections.h>
28#include <asm/cpu_device_id.h>
29#include <asm/imr.h>
30#include <asm/iosf_mbi.h>
31#include <linux/debugfs.h>
32#include <linux/init.h>
33#include <linux/mm.h>
34#include <linux/module.h>
35#include <linux/types.h>
36
37struct imr_device {
38 struct dentry *file;
39 bool init;
40 struct mutex lock;
41 int max_imr;
42 int reg_base;
43};
44
45static struct imr_device imr_dev;
46
47/*
48 * IMR read/write mask control registers.
49 * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for
50 * bit definitions.
51 *
52 * addr_hi
53 * 31 Lock bit
54 * 30:24 Reserved
55 * 23:2 1 KiB aligned lo address
56 * 1:0 Reserved
57 *
58 * addr_hi
59 * 31:24 Reserved
60 * 23:2 1 KiB aligned hi address
61 * 1:0 Reserved
62 */
63#define IMR_LOCK BIT(31)
64
65struct imr_regs {
66 u32 addr_lo;
67 u32 addr_hi;
68 u32 rmask;
69 u32 wmask;
70};
71
72#define IMR_NUM_REGS (sizeof(struct imr_regs)/sizeof(u32))
73#define IMR_SHIFT 8
74#define imr_to_phys(x) ((x) << IMR_SHIFT)
75#define phys_to_imr(x) ((x) >> IMR_SHIFT)
76
77/**
78 * imr_is_enabled - true if an IMR is enabled false otherwise.
79 *
80 * Determines if an IMR is enabled based on address range and read/write
81 * mask. An IMR set with an address range set to zero and a read/write
82 * access mask set to all is considered to be disabled. An IMR in any
83 * other state - for example set to zero but without read/write access
84 * all is considered to be enabled. This definition of disabled is how
85 * firmware switches off an IMR and is maintained in kernel for
86 * consistency.
87 *
88 * @imr: pointer to IMR descriptor.
89 * @return: true if IMR enabled false if disabled.
90 */
91static inline int imr_is_enabled(struct imr_regs *imr)
92{
93 return !(imr->rmask == IMR_READ_ACCESS_ALL &&
94 imr->wmask == IMR_WRITE_ACCESS_ALL &&
95 imr_to_phys(imr->addr_lo) == 0 &&
96 imr_to_phys(imr->addr_hi) == 0);
97}
98
99/**
100 * imr_read - read an IMR at a given index.
101 *
102 * Requires caller to hold imr mutex.
103 *
104 * @idev: pointer to imr_device structure.
105 * @imr_id: IMR entry to read.
106 * @imr: IMR structure representing address and access masks.
107 * @return: 0 on success or error code passed from mbi_iosf on failure.
108 */
109static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
110{
111 u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
112 int ret;
113
114 ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
115 reg++, &imr->addr_lo);
116 if (ret)
117 return ret;
118
119 ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
120 reg++, &imr->addr_hi);
121 if (ret)
122 return ret;
123
124 ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
125 reg++, &imr->rmask);
126 if (ret)
127 return ret;
128
129 return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
130 reg++, &imr->wmask);
131}
132
133/**
134 * imr_write - write an IMR at a given index.
135 *
136 * Requires caller to hold imr mutex.
137 * Note lock bits need to be written independently of address bits.
138 *
139 * @idev: pointer to imr_device structure.
140 * @imr_id: IMR entry to write.
141 * @imr: IMR structure representing address and access masks.
142 * @lock: indicates if the IMR lock bit should be applied.
143 * @return: 0 on success or error code passed from mbi_iosf on failure.
144 */
145static int imr_write(struct imr_device *idev, u32 imr_id,
146 struct imr_regs *imr, bool lock)
147{
148 unsigned long flags;
149 u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
150 int ret;
151
152 local_irq_save(flags);
153
154 ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++,
155 imr->addr_lo);
156 if (ret)
157 goto failed;
158
159 ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
160 reg++, imr->addr_hi);
161 if (ret)
162 goto failed;
163
164 ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
165 reg++, imr->rmask);
166 if (ret)
167 goto failed;
168
169 ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
170 reg++, imr->wmask);
171 if (ret)
172 goto failed;
173
174 /* Lock bit must be set separately to addr_lo address bits. */
175 if (lock) {
176 imr->addr_lo |= IMR_LOCK;
177 ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
178 reg - IMR_NUM_REGS, imr->addr_lo);
179 if (ret)
180 goto failed;
181 }
182
183 local_irq_restore(flags);
184 return 0;
185failed:
186 /*
187 * If writing to the IOSF failed then we're in an unknown state,
188 * likely a very bad state. An IMR in an invalid state will almost
189 * certainly lead to a memory access violation.
190 */
191 local_irq_restore(flags);
192 WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n",
193 imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK);
194
195 return ret;
196}
197
198/**
199 * imr_dbgfs_state_show - print state of IMR registers.
200 *
201 * @s: pointer to seq_file for output.
202 * @unused: unused parameter.
203 * @return: 0 on success or error code passed from mbi_iosf on failure.
204 */
205static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
206{
207 phys_addr_t base;
208 phys_addr_t end;
209 int i;
210 struct imr_device *idev = s->private;
211 struct imr_regs imr;
212 size_t size;
213 int ret = -ENODEV;
214
215 mutex_lock(&idev->lock);
216
217 for (i = 0; i < idev->max_imr; i++) {
218
219 ret = imr_read(idev, i, &imr);
220 if (ret)
221 break;
222
223 /*
224 * Remember to add IMR_ALIGN bytes to size to indicate the
225 * inherent IMR_ALIGN size bytes contained in the masked away
226 * lower ten bits.
227 */
228 if (imr_is_enabled(&imr)) {
229 base = imr_to_phys(imr.addr_lo);
230 end = imr_to_phys(imr.addr_hi) + IMR_MASK;
231 } else {
232 base = 0;
233 end = 0;
234 }
235 size = end - base;
236 seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
237 "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
238 &base, &end, size, imr.rmask, imr.wmask,
239 imr_is_enabled(&imr) ? "enabled " : "disabled",
240 imr.addr_lo & IMR_LOCK ? "locked" : "unlocked");
241 }
242
243 mutex_unlock(&idev->lock);
244 return ret;
245}
246
247/**
248 * imr_state_open - debugfs open callback.
249 *
250 * @inode: pointer to struct inode.
251 * @file: pointer to struct file.
252 * @return: result of single open.
253 */
254static int imr_state_open(struct inode *inode, struct file *file)
255{
256 return single_open(file, imr_dbgfs_state_show, inode->i_private);
257}
258
259static const struct file_operations imr_state_ops = {
260 .open = imr_state_open,
261 .read = seq_read,
262 .llseek = seq_lseek,
263 .release = single_release,
264};
265
266/**
267 * imr_debugfs_register - register debugfs hooks.
268 *
269 * @idev: pointer to imr_device structure.
270 * @return: 0 on success - errno on failure.
271 */
272static int imr_debugfs_register(struct imr_device *idev)
273{
274 idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL,
275 idev, &imr_state_ops);
276 return PTR_ERR_OR_ZERO(idev->file);
277}
278
279/**
280 * imr_debugfs_unregister - unregister debugfs hooks.
281 *
282 * @idev: pointer to imr_device structure.
283 * @return:
284 */
285static void imr_debugfs_unregister(struct imr_device *idev)
286{
287 debugfs_remove(idev->file);
288}
289
290/**
291 * imr_check_params - check passed address range IMR alignment and non-zero size
292 *
293 * @base: base address of intended IMR.
294 * @size: size of intended IMR.
295 * @return: zero on valid range -EINVAL on unaligned base/size.
296 */
297static int imr_check_params(phys_addr_t base, size_t size)
298{
299 if ((base & IMR_MASK) || (size & IMR_MASK)) {
300 pr_err("base %pa size 0x%08zx must align to 1KiB\n",
301 &base, size);
302 return -EINVAL;
303 }
304 if (size == 0)
305 return -EINVAL;
306
307 return 0;
308}
309
310/**
311 * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends.
312 *
313 * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the
314 * value in the register. We need to subtract IMR_ALIGN bytes from input sizes
315 * as a result.
316 *
317 * @size: input size bytes.
318 * @return: reduced size.
319 */
320static inline size_t imr_raw_size(size_t size)
321{
322 return size - IMR_ALIGN;
323}
324
325/**
326 * imr_address_overlap - detects an address overlap.
327 *
328 * @addr: address to check against an existing IMR.
329 * @imr: imr being checked.
330 * @return: true for overlap false for no overlap.
331 */
332static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
333{
334 return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi);
335}
336
337/**
338 * imr_add_range - add an Isolated Memory Region.
339 *
340 * @base: physical base address of region aligned to 1KiB.
341 * @size: physical size of region in bytes must be aligned to 1KiB.
342 * @read_mask: read access mask.
343 * @write_mask: write access mask.
344 * @lock: indicates whether or not to permanently lock this region.
345 * @return: zero on success or negative value indicating error.
346 */
347int imr_add_range(phys_addr_t base, size_t size,
348 unsigned int rmask, unsigned int wmask, bool lock)
349{
350 phys_addr_t end;
351 unsigned int i;
352 struct imr_device *idev = &imr_dev;
353 struct imr_regs imr;
354 size_t raw_size;
355 int reg;
356 int ret;
357
358 if (WARN_ONCE(idev->init == false, "driver not initialized"))
359 return -ENODEV;
360
361 ret = imr_check_params(base, size);
362 if (ret)
363 return ret;
364
365 /* Tweak the size value. */
366 raw_size = imr_raw_size(size);
367 end = base + raw_size;
368
369 /*
370 * Check for reserved IMR value common to firmware, kernel and grub
371 * indicating a disabled IMR.
372 */
373 imr.addr_lo = phys_to_imr(base);
374 imr.addr_hi = phys_to_imr(end);
375 imr.rmask = rmask;
376 imr.wmask = wmask;
377 if (!imr_is_enabled(&imr))
378 return -ENOTSUPP;
379
380 mutex_lock(&idev->lock);
381
382 /*
383 * Find a free IMR while checking for an existing overlapping range.
384 * Note there's no restriction in silicon to prevent IMR overlaps.
385 * For the sake of simplicity and ease in defining/debugging an IMR
386 * memory map we exclude IMR overlaps.
387 */
388 reg = -1;
389 for (i = 0; i < idev->max_imr; i++) {
390 ret = imr_read(idev, i, &imr);
391 if (ret)
392 goto failed;
393
394 /* Find overlap @ base or end of requested range. */
395 ret = -EINVAL;
396 if (imr_is_enabled(&imr)) {
397 if (imr_address_overlap(base, &imr))
398 goto failed;
399 if (imr_address_overlap(end, &imr))
400 goto failed;
401 } else {
402 reg = i;
403 }
404 }
405
406 /* Error out if we have no free IMR entries. */
407 if (reg == -1) {
408 ret = -ENOMEM;
409 goto failed;
410 }
411
412 pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n",
413 reg, &base, &end, raw_size, rmask, wmask);
414
415 /* Enable IMR at specified range and access mask. */
416 imr.addr_lo = phys_to_imr(base);
417 imr.addr_hi = phys_to_imr(end);
418 imr.rmask = rmask;
419 imr.wmask = wmask;
420
421 ret = imr_write(idev, reg, &imr, lock);
422 if (ret < 0) {
423 /*
424 * In the highly unlikely event iosf_mbi_write failed
425 * attempt to rollback the IMR setup skipping the trapping
426 * of further IOSF write failures.
427 */
428 imr.addr_lo = 0;
429 imr.addr_hi = 0;
430 imr.rmask = IMR_READ_ACCESS_ALL;
431 imr.wmask = IMR_WRITE_ACCESS_ALL;
432 imr_write(idev, reg, &imr, false);
433 }
434failed:
435 mutex_unlock(&idev->lock);
436 return ret;
437}
438EXPORT_SYMBOL_GPL(imr_add_range);
439
440/**
441 * __imr_remove_range - delete an Isolated Memory Region.
442 *
443 * This function allows you to delete an IMR by its index specified by reg or
444 * by address range specified by base and size respectively. If you specify an
445 * index on its own the base and size parameters are ignored.
446 * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored.
447 * imr_remove_range(-1, base, size); delete IMR from base to base+size.
448 *
449 * @reg: imr index to remove.
450 * @base: physical base address of region aligned to 1 KiB.
451 * @size: physical size of region in bytes aligned to 1 KiB.
452 * @return: -EINVAL on invalid range or out or range id
453 * -ENODEV if reg is valid but no IMR exists or is locked
454 * 0 on success.
455 */
456static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
457{
458 phys_addr_t end;
459 bool found = false;
460 unsigned int i;
461 struct imr_device *idev = &imr_dev;
462 struct imr_regs imr;
463 size_t raw_size;
464 int ret = 0;
465
466 if (WARN_ONCE(idev->init == false, "driver not initialized"))
467 return -ENODEV;
468
469 /*
470 * Validate address range if deleting by address, else we are
471 * deleting by index where base and size will be ignored.
472 */
473 if (reg == -1) {
474 ret = imr_check_params(base, size);
475 if (ret)
476 return ret;
477 }
478
479 /* Tweak the size value. */
480 raw_size = imr_raw_size(size);
481 end = base + raw_size;
482
483 mutex_lock(&idev->lock);
484
485 if (reg >= 0) {
486 /* If a specific IMR is given try to use it. */
487 ret = imr_read(idev, reg, &imr);
488 if (ret)
489 goto failed;
490
491 if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) {
492 ret = -ENODEV;
493 goto failed;
494 }
495 found = true;
496 } else {
497 /* Search for match based on address range. */
498 for (i = 0; i < idev->max_imr; i++) {
499 ret = imr_read(idev, i, &imr);
500 if (ret)
501 goto failed;
502
503 if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK)
504 continue;
505
506 if ((imr_to_phys(imr.addr_lo) == base) &&
507 (imr_to_phys(imr.addr_hi) == end)) {
508 found = true;
509 reg = i;
510 break;
511 }
512 }
513 }
514
515 if (!found) {
516 ret = -ENODEV;
517 goto failed;
518 }
519
520 pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size);
521
522 /* Tear down the IMR. */
523 imr.addr_lo = 0;
524 imr.addr_hi = 0;
525 imr.rmask = IMR_READ_ACCESS_ALL;
526 imr.wmask = IMR_WRITE_ACCESS_ALL;
527
528 ret = imr_write(idev, reg, &imr, false);
529
530failed:
531 mutex_unlock(&idev->lock);
532 return ret;
533}
534
535/**
536 * imr_remove_range - delete an Isolated Memory Region by address
537 *
538 * This function allows you to delete an IMR by an address range specified
539 * by base and size respectively.
540 * imr_remove_range(base, size); delete IMR from base to base+size.
541 *
542 * @base: physical base address of region aligned to 1 KiB.
543 * @size: physical size of region in bytes aligned to 1 KiB.
544 * @return: -EINVAL on invalid range or out or range id
545 * -ENODEV if reg is valid but no IMR exists or is locked
546 * 0 on success.
547 */
548int imr_remove_range(phys_addr_t base, size_t size)
549{
550 return __imr_remove_range(-1, base, size);
551}
552EXPORT_SYMBOL_GPL(imr_remove_range);
553
554/**
555 * imr_clear - delete an Isolated Memory Region by index
556 *
557 * This function allows you to delete an IMR by an address range specified
558 * by the index of the IMR. Useful for initial sanitization of the IMR
559 * address map.
560 * imr_ge(base, size); delete IMR from base to base+size.
561 *
562 * @reg: imr index to remove.
563 * @return: -EINVAL on invalid range or out or range id
564 * -ENODEV if reg is valid but no IMR exists or is locked
565 * 0 on success.
566 */
567static inline int imr_clear(int reg)
568{
569 return __imr_remove_range(reg, 0, 0);
570}
571
572/**
573 * imr_fixup_memmap - Tear down IMRs used during bootup.
574 *
575 * BIOS and Grub both setup IMRs around compressed kernel, initrd memory
576 * that need to be removed before the kernel hands out one of the IMR
577 * encased addresses to a downstream DMA agent such as the SD or Ethernet.
578 * IMRs on Galileo are setup to immediately reset the system on violation.
579 * As a result if you're running a root filesystem from SD - you'll need
580 * the boot-time IMRs torn down or you'll find seemingly random resets when
581 * using your filesystem.
582 *
583 * @idev: pointer to imr_device structure.
584 * @return:
585 */
586static void __init imr_fixup_memmap(struct imr_device *idev)
587{
588 phys_addr_t base = virt_to_phys(&_text);
589 size_t size = virt_to_phys(&__end_rodata) - base;
590 int i;
591 int ret;
592
593 /* Tear down all existing unlocked IMRs. */
594 for (i = 0; i < idev->max_imr; i++)
595 imr_clear(i);
596
597 /*
598 * Setup a locked IMR around the physical extent of the kernel
599 * from the beginning of the .text secton to the end of the
600 * .rodata section as one physically contiguous block.
601 */
602 ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
603 if (ret < 0) {
604 pr_err("unable to setup IMR for kernel: (%p - %p)\n",
605 &_text, &__end_rodata);
606 } else {
607 pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
608 size / 1024, &_text, &__end_rodata);
609 }
610
611}
612
613static const struct x86_cpu_id imr_ids[] __initconst = {
614 { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
615 {}
616};
617MODULE_DEVICE_TABLE(x86cpu, imr_ids);
618
619/**
620 * imr_init - entry point for IMR driver.
621 *
622 * return: -ENODEV for no IMR support 0 if good to go.
623 */
624static int __init imr_init(void)
625{
626 struct imr_device *idev = &imr_dev;
627 int ret;
628
629 if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
630 return -ENODEV;
631
632 idev->max_imr = QUARK_X1000_IMR_MAX;
633 idev->reg_base = QUARK_X1000_IMR_REGBASE;
634 idev->init = true;
635
636 mutex_init(&idev->lock);
637 ret = imr_debugfs_register(idev);
638 if (ret != 0)
639 pr_warn("debugfs register failed!\n");
640 imr_fixup_memmap(idev);
641 return 0;
642}
643
644/**
645 * imr_exit - exit point for IMR code.
646 *
647 * Deregisters debugfs, leave IMR state as-is.
648 *
649 * return:
650 */
651static void __exit imr_exit(void)
652{
653 imr_debugfs_unregister(&imr_dev);
654}
655
656module_init(imr_init);
657module_exit(imr_exit);
658
659MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
660MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
661MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
new file mode 100644
index 000000000000..c9a0838890e2
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -0,0 +1,129 @@
1/**
2 * imr_selftest.c
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
6 *
7 * IMR self test. The purpose of this module is to run a set of tests on the
8 * IMR API to validate it's sanity. We check for overlapping, reserved
9 * addresses and setup/teardown sanity.
10 *
11 */
12
13#include <asm-generic/sections.h>
14#include <asm/imr.h>
15#include <linux/init.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/types.h>
19
20#define SELFTEST KBUILD_MODNAME ": "
21/**
22 * imr_self_test_result - Print result string for self test.
23 *
24 * @res: result code - true if test passed false otherwise.
25 * @fmt: format string.
26 * ... variadic argument list.
27 */
28static void __init imr_self_test_result(int res, const char *fmt, ...)
29{
30 va_list vlist;
31
32 /* Print pass/fail. */
33 if (res)
34 pr_info(SELFTEST "pass ");
35 else
36 pr_info(SELFTEST "fail ");
37
38 /* Print variable string. */
39 va_start(vlist, fmt);
40 vprintk(fmt, vlist);
41 va_end(vlist);
42
43 /* Optional warning. */
44 WARN(res == 0, "test failed");
45}
46#undef SELFTEST
47
48/**
49 * imr_self_test
50 *
51 * Verify IMR self_test with some simple tests to verify overlap,
52 * zero sized allocations and 1 KiB sized areas.
53 *
54 */
55static void __init imr_self_test(void)
56{
57 phys_addr_t base = virt_to_phys(&_text);
58 size_t size = virt_to_phys(&__end_rodata) - base;
59 const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
60 int ret;
61
62 /* Test zero zero. */
63 ret = imr_add_range(0, 0, 0, 0, false);
64 imr_self_test_result(ret < 0, "zero sized IMR\n");
65
66 /* Test exact overlap. */
67 ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
68 imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
69
70 /* Test overlap with base inside of existing. */
71 base += size - IMR_ALIGN;
72 ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
73 imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
74
75 /* Test overlap with end inside of existing. */
76 base -= size + IMR_ALIGN * 2;
77 ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
78 imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
79
80 /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
81 ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
82 IMR_WRITE_ACCESS_ALL, false);
83 imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
84
85 /* Test that a 1 KiB IMR @ zero with CPU only will work. */
86 ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
87 imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
88 if (ret >= 0) {
89 ret = imr_remove_range(0, IMR_ALIGN);
90 imr_self_test_result(ret == 0, "teardown - cpu-access\n");
91 }
92
93 /* Test 2 KiB works. */
94 size = IMR_ALIGN * 2;
95 ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
96 IMR_WRITE_ACCESS_ALL, false);
97 imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
98 if (ret >= 0) {
99 ret = imr_remove_range(0, size);
100 imr_self_test_result(ret == 0, "teardown 2KiB\n");
101 }
102}
103
104/**
105 * imr_self_test_init - entry point for IMR driver.
106 *
107 * return: -ENODEV for no IMR support 0 if good to go.
108 */
109static int __init imr_self_test_init(void)
110{
111 imr_self_test();
112 return 0;
113}
114
115/**
116 * imr_self_test_exit - exit point for IMR code.
117 *
118 * return:
119 */
120static void __exit imr_self_test_exit(void)
121{
122}
123
124module_init(imr_self_test_init);
125module_exit(imr_self_test_exit);
126
127MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
128MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
129MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23b45eb9a89c..956374c1edbc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -41,7 +41,7 @@ static u8 zero_stats;
41static inline void check_zero(void) 41static inline void check_zero(void)
42{ 42{
43 u8 ret; 43 u8 ret;
44 u8 old = ACCESS_ONCE(zero_stats); 44 u8 old = READ_ONCE(zero_stats);
45 if (unlikely(old)) { 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0); 46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */ 47 /* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
112 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); 112 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
113 int cpu = smp_processor_id(); 113 int cpu = smp_processor_id();
114 u64 start; 114 u64 start;
115 __ticket_t head;
115 unsigned long flags; 116 unsigned long flags;
116 117
117 /* If kicker interrupts not initialized yet, just spin */ 118 /* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
159 */ 160 */
160 __ticket_enter_slowpath(lock); 161 __ticket_enter_slowpath(lock);
161 162
163 /* make sure enter_slowpath, which is atomic does not cross the read */
164 smp_mb__after_atomic();
165
162 /* 166 /*
163 * check again make sure it didn't become free while 167 * check again make sure it didn't become free while
164 * we weren't looking 168 * we weren't looking
165 */ 169 */
166 if (ACCESS_ONCE(lock->tickets.head) == want) { 170 head = READ_ONCE(lock->tickets.head);
171 if (__tickets_equal(head, want)) {
167 add_stats(TAKEN_SLOW_PICKUP, 1); 172 add_stats(TAKEN_SLOW_PICKUP, 1);
168 goto out; 173 goto out;
169 } 174 }
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
204 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); 209 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
205 210
206 /* Make sure we read lock before want */ 211 /* Make sure we read lock before want */
207 if (ACCESS_ONCE(w->lock) == lock && 212 if (READ_ONCE(w->lock) == lock &&
208 ACCESS_ONCE(w->want) == next) { 213 READ_ONCE(w->want) == next) {
209 add_stats(RELEASED_SLOW_KICKED, 1); 214 add_stats(RELEASED_SLOW_KICKED, 1);
210 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 215 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
211 break; 216 break;
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index ec318bf434a6..1786574536b2 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -157,12 +157,16 @@ static int ipmi_release(struct inode *inode, struct file *file)
157{ 157{
158 struct ipmi_file_private *priv = file->private_data; 158 struct ipmi_file_private *priv = file->private_data;
159 int rv; 159 int rv;
160 struct ipmi_recv_msg *msg, *next;
160 161
161 rv = ipmi_destroy_user(priv->user); 162 rv = ipmi_destroy_user(priv->user);
162 if (rv) 163 if (rv)
163 return rv; 164 return rv;
164 165
165 /* FIXME - free the messages in the list. */ 166 list_for_each_entry_safe(msg, next, &priv->recv_msgs, link)
167 ipmi_free_recv_msg(msg);
168
169
166 kfree(priv); 170 kfree(priv);
167 171
168 return 0; 172 return 0;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 6b65fa4e0c55..9bb592872532 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1483,14 +1483,10 @@ static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg,
1483 smi_msg->msgid = msgid; 1483 smi_msg->msgid = msgid;
1484} 1484}
1485 1485
1486static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers, 1486static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf,
1487 struct ipmi_smi_msg *smi_msg, int priority) 1487 struct ipmi_smi_msg *smi_msg,
1488 int priority)
1488{ 1489{
1489 int run_to_completion = intf->run_to_completion;
1490 unsigned long flags;
1491
1492 if (!run_to_completion)
1493 spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
1494 if (intf->curr_msg) { 1490 if (intf->curr_msg) {
1495 if (priority > 0) 1491 if (priority > 0)
1496 list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs); 1492 list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs);
@@ -1500,8 +1496,25 @@ static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
1500 } else { 1496 } else {
1501 intf->curr_msg = smi_msg; 1497 intf->curr_msg = smi_msg;
1502 } 1498 }
1503 if (!run_to_completion) 1499
1500 return smi_msg;
1501}
1502
1503
1504static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
1505 struct ipmi_smi_msg *smi_msg, int priority)
1506{
1507 int run_to_completion = intf->run_to_completion;
1508
1509 if (run_to_completion) {
1510 smi_msg = smi_add_send_msg(intf, smi_msg, priority);
1511 } else {
1512 unsigned long flags;
1513
1514 spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
1515 smi_msg = smi_add_send_msg(intf, smi_msg, priority);
1504 spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); 1516 spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
1517 }
1505 1518
1506 if (smi_msg) 1519 if (smi_msg)
1507 handlers->sender(intf->send_info, smi_msg); 1520 handlers->sender(intf->send_info, smi_msg);
@@ -1985,7 +1998,9 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v)
1985 seq_printf(m, "%x", intf->channels[0].address); 1998 seq_printf(m, "%x", intf->channels[0].address);
1986 for (i = 1; i < IPMI_MAX_CHANNELS; i++) 1999 for (i = 1; i < IPMI_MAX_CHANNELS; i++)
1987 seq_printf(m, " %x", intf->channels[i].address); 2000 seq_printf(m, " %x", intf->channels[i].address);
1988 return seq_putc(m, '\n'); 2001 seq_putc(m, '\n');
2002
2003 return seq_has_overflowed(m);
1989} 2004}
1990 2005
1991static int smi_ipmb_proc_open(struct inode *inode, struct file *file) 2006static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
@@ -2004,9 +2019,11 @@ static int smi_version_proc_show(struct seq_file *m, void *v)
2004{ 2019{
2005 ipmi_smi_t intf = m->private; 2020 ipmi_smi_t intf = m->private;
2006 2021
2007 return seq_printf(m, "%u.%u\n", 2022 seq_printf(m, "%u.%u\n",
2008 ipmi_version_major(&intf->bmc->id), 2023 ipmi_version_major(&intf->bmc->id),
2009 ipmi_version_minor(&intf->bmc->id)); 2024 ipmi_version_minor(&intf->bmc->id));
2025
2026 return seq_has_overflowed(m);
2010} 2027}
2011 2028
2012static int smi_version_proc_open(struct inode *inode, struct file *file) 2029static int smi_version_proc_open(struct inode *inode, struct file *file)
@@ -2353,11 +2370,28 @@ static struct attribute *bmc_dev_attrs[] = {
2353 &dev_attr_additional_device_support.attr, 2370 &dev_attr_additional_device_support.attr,
2354 &dev_attr_manufacturer_id.attr, 2371 &dev_attr_manufacturer_id.attr,
2355 &dev_attr_product_id.attr, 2372 &dev_attr_product_id.attr,
2373 &dev_attr_aux_firmware_revision.attr,
2374 &dev_attr_guid.attr,
2356 NULL 2375 NULL
2357}; 2376};
2358 2377
2378static umode_t bmc_dev_attr_is_visible(struct kobject *kobj,
2379 struct attribute *attr, int idx)
2380{
2381 struct device *dev = kobj_to_dev(kobj);
2382 struct bmc_device *bmc = to_bmc_device(dev);
2383 umode_t mode = attr->mode;
2384
2385 if (attr == &dev_attr_aux_firmware_revision.attr)
2386 return bmc->id.aux_firmware_revision_set ? mode : 0;
2387 if (attr == &dev_attr_guid.attr)
2388 return bmc->guid_set ? mode : 0;
2389 return mode;
2390}
2391
2359static struct attribute_group bmc_dev_attr_group = { 2392static struct attribute_group bmc_dev_attr_group = {
2360 .attrs = bmc_dev_attrs, 2393 .attrs = bmc_dev_attrs,
2394 .is_visible = bmc_dev_attr_is_visible,
2361}; 2395};
2362 2396
2363static const struct attribute_group *bmc_dev_attr_groups[] = { 2397static const struct attribute_group *bmc_dev_attr_groups[] = {
@@ -2380,13 +2414,6 @@ cleanup_bmc_device(struct kref *ref)
2380{ 2414{
2381 struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount); 2415 struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount);
2382 2416
2383 if (bmc->id.aux_firmware_revision_set)
2384 device_remove_file(&bmc->pdev.dev,
2385 &dev_attr_aux_firmware_revision);
2386 if (bmc->guid_set)
2387 device_remove_file(&bmc->pdev.dev,
2388 &dev_attr_guid);
2389
2390 platform_device_unregister(&bmc->pdev); 2417 platform_device_unregister(&bmc->pdev);
2391} 2418}
2392 2419
@@ -2407,33 +2434,6 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf)
2407 mutex_unlock(&ipmidriver_mutex); 2434 mutex_unlock(&ipmidriver_mutex);
2408} 2435}
2409 2436
2410static int create_bmc_files(struct bmc_device *bmc)
2411{
2412 int err;
2413
2414 if (bmc->id.aux_firmware_revision_set) {
2415 err = device_create_file(&bmc->pdev.dev,
2416 &dev_attr_aux_firmware_revision);
2417 if (err)
2418 goto out;
2419 }
2420 if (bmc->guid_set) {
2421 err = device_create_file(&bmc->pdev.dev,
2422 &dev_attr_guid);
2423 if (err)
2424 goto out_aux_firm;
2425 }
2426
2427 return 0;
2428
2429out_aux_firm:
2430 if (bmc->id.aux_firmware_revision_set)
2431 device_remove_file(&bmc->pdev.dev,
2432 &dev_attr_aux_firmware_revision);
2433out:
2434 return err;
2435}
2436
2437static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum) 2437static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
2438{ 2438{
2439 int rv; 2439 int rv;
@@ -2522,15 +2522,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
2522 return rv; 2522 return rv;
2523 } 2523 }
2524 2524
2525 rv = create_bmc_files(bmc);
2526 if (rv) {
2527 mutex_lock(&ipmidriver_mutex);
2528 platform_device_unregister(&bmc->pdev);
2529 mutex_unlock(&ipmidriver_mutex);
2530
2531 return rv;
2532 }
2533
2534 dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, " 2525 dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, "
2535 "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n", 2526 "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n",
2536 bmc->id.manufacturer_id, 2527 bmc->id.manufacturer_id,
@@ -4212,7 +4203,6 @@ static void need_waiter(ipmi_smi_t intf)
4212static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0); 4203static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
4213static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); 4204static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
4214 4205
4215/* FIXME - convert these to slabs. */
4216static void free_smi_msg(struct ipmi_smi_msg *msg) 4206static void free_smi_msg(struct ipmi_smi_msg *msg)
4217{ 4207{
4218 atomic_dec(&smi_msg_inuse_count); 4208 atomic_dec(&smi_msg_inuse_count);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 967b73aa4e66..f6646ed3047e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -321,6 +321,18 @@ static int try_smi_init(struct smi_info *smi);
321static void cleanup_one_si(struct smi_info *to_clean); 321static void cleanup_one_si(struct smi_info *to_clean);
322static void cleanup_ipmi_si(void); 322static void cleanup_ipmi_si(void);
323 323
324#ifdef DEBUG_TIMING
325void debug_timestamp(char *msg)
326{
327 struct timespec64 t;
328
329 getnstimeofday64(&t);
330 pr_debug("**%s: %lld.%9.9ld\n", msg, (long long) t.tv_sec, t.tv_nsec);
331}
332#else
333#define debug_timestamp(x)
334#endif
335
324static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list); 336static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list);
325static int register_xaction_notifier(struct notifier_block *nb) 337static int register_xaction_notifier(struct notifier_block *nb)
326{ 338{
@@ -358,9 +370,6 @@ static void return_hosed_msg(struct smi_info *smi_info, int cCode)
358static enum si_sm_result start_next_msg(struct smi_info *smi_info) 370static enum si_sm_result start_next_msg(struct smi_info *smi_info)
359{ 371{
360 int rv; 372 int rv;
361#ifdef DEBUG_TIMING
362 struct timeval t;
363#endif
364 373
365 if (!smi_info->waiting_msg) { 374 if (!smi_info->waiting_msg) {
366 smi_info->curr_msg = NULL; 375 smi_info->curr_msg = NULL;
@@ -370,10 +379,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
370 379
371 smi_info->curr_msg = smi_info->waiting_msg; 380 smi_info->curr_msg = smi_info->waiting_msg;
372 smi_info->waiting_msg = NULL; 381 smi_info->waiting_msg = NULL;
373#ifdef DEBUG_TIMING 382 debug_timestamp("Start2");
374 do_gettimeofday(&t);
375 printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
376#endif
377 err = atomic_notifier_call_chain(&xaction_notifier_list, 383 err = atomic_notifier_call_chain(&xaction_notifier_list,
378 0, smi_info); 384 0, smi_info);
379 if (err & NOTIFY_STOP_MASK) { 385 if (err & NOTIFY_STOP_MASK) {
@@ -582,12 +588,8 @@ static void check_bt_irq(struct smi_info *smi_info, bool irq_on)
582static void handle_transaction_done(struct smi_info *smi_info) 588static void handle_transaction_done(struct smi_info *smi_info)
583{ 589{
584 struct ipmi_smi_msg *msg; 590 struct ipmi_smi_msg *msg;
585#ifdef DEBUG_TIMING
586 struct timeval t;
587 591
588 do_gettimeofday(&t); 592 debug_timestamp("Done");
589 printk(KERN_DEBUG "**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec);
590#endif
591 switch (smi_info->si_state) { 593 switch (smi_info->si_state) {
592 case SI_NORMAL: 594 case SI_NORMAL:
593 if (!smi_info->curr_msg) 595 if (!smi_info->curr_msg)
@@ -929,24 +931,15 @@ static void sender(void *send_info,
929 struct smi_info *smi_info = send_info; 931 struct smi_info *smi_info = send_info;
930 enum si_sm_result result; 932 enum si_sm_result result;
931 unsigned long flags; 933 unsigned long flags;
932#ifdef DEBUG_TIMING
933 struct timeval t;
934#endif
935
936 BUG_ON(smi_info->waiting_msg);
937 smi_info->waiting_msg = msg;
938 934
939#ifdef DEBUG_TIMING 935 debug_timestamp("Enqueue");
940 do_gettimeofday(&t);
941 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
942#endif
943 936
944 if (smi_info->run_to_completion) { 937 if (smi_info->run_to_completion) {
945 /* 938 /*
946 * If we are running to completion, start it and run 939 * If we are running to completion, start it and run
947 * transactions until everything is clear. 940 * transactions until everything is clear.
948 */ 941 */
949 smi_info->curr_msg = smi_info->waiting_msg; 942 smi_info->curr_msg = msg;
950 smi_info->waiting_msg = NULL; 943 smi_info->waiting_msg = NULL;
951 944
952 /* 945 /*
@@ -964,6 +957,15 @@ static void sender(void *send_info,
964 } 957 }
965 958
966 spin_lock_irqsave(&smi_info->si_lock, flags); 959 spin_lock_irqsave(&smi_info->si_lock, flags);
960 /*
961 * The following two lines don't need to be under the lock for
962 * the lock's sake, but they do need SMP memory barriers to
963 * avoid getting things out of order. We are already claiming
964 * the lock, anyway, so just do it under the lock to avoid the
965 * ordering problem.
966 */
967 BUG_ON(smi_info->waiting_msg);
968 smi_info->waiting_msg = msg;
967 check_start_timer_thread(smi_info); 969 check_start_timer_thread(smi_info);
968 spin_unlock_irqrestore(&smi_info->si_lock, flags); 970 spin_unlock_irqrestore(&smi_info->si_lock, flags);
969} 971}
@@ -989,18 +991,18 @@ static void set_run_to_completion(void *send_info, bool i_run_to_completion)
989 * we are spinning in kipmid looking for something and not delaying 991 * we are spinning in kipmid looking for something and not delaying
990 * between checks 992 * between checks
991 */ 993 */
992static inline void ipmi_si_set_not_busy(struct timespec *ts) 994static inline void ipmi_si_set_not_busy(struct timespec64 *ts)
993{ 995{
994 ts->tv_nsec = -1; 996 ts->tv_nsec = -1;
995} 997}
996static inline int ipmi_si_is_busy(struct timespec *ts) 998static inline int ipmi_si_is_busy(struct timespec64 *ts)
997{ 999{
998 return ts->tv_nsec != -1; 1000 return ts->tv_nsec != -1;
999} 1001}
1000 1002
1001static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result, 1003static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
1002 const struct smi_info *smi_info, 1004 const struct smi_info *smi_info,
1003 struct timespec *busy_until) 1005 struct timespec64 *busy_until)
1004{ 1006{
1005 unsigned int max_busy_us = 0; 1007 unsigned int max_busy_us = 0;
1006 1008
@@ -1009,12 +1011,13 @@ static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
1009 if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY) 1011 if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY)
1010 ipmi_si_set_not_busy(busy_until); 1012 ipmi_si_set_not_busy(busy_until);
1011 else if (!ipmi_si_is_busy(busy_until)) { 1013 else if (!ipmi_si_is_busy(busy_until)) {
1012 getnstimeofday(busy_until); 1014 getnstimeofday64(busy_until);
1013 timespec_add_ns(busy_until, max_busy_us*NSEC_PER_USEC); 1015 timespec64_add_ns(busy_until, max_busy_us*NSEC_PER_USEC);
1014 } else { 1016 } else {
1015 struct timespec now; 1017 struct timespec64 now;
1016 getnstimeofday(&now); 1018
1017 if (unlikely(timespec_compare(&now, busy_until) > 0)) { 1019 getnstimeofday64(&now);
1020 if (unlikely(timespec64_compare(&now, busy_until) > 0)) {
1018 ipmi_si_set_not_busy(busy_until); 1021 ipmi_si_set_not_busy(busy_until);
1019 return 0; 1022 return 0;
1020 } 1023 }
@@ -1037,7 +1040,7 @@ static int ipmi_thread(void *data)
1037 struct smi_info *smi_info = data; 1040 struct smi_info *smi_info = data;
1038 unsigned long flags; 1041 unsigned long flags;
1039 enum si_sm_result smi_result; 1042 enum si_sm_result smi_result;
1040 struct timespec busy_until; 1043 struct timespec64 busy_until;
1041 1044
1042 ipmi_si_set_not_busy(&busy_until); 1045 ipmi_si_set_not_busy(&busy_until);
1043 set_user_nice(current, MAX_NICE); 1046 set_user_nice(current, MAX_NICE);
@@ -1128,15 +1131,10 @@ static void smi_timeout(unsigned long data)
1128 unsigned long jiffies_now; 1131 unsigned long jiffies_now;
1129 long time_diff; 1132 long time_diff;
1130 long timeout; 1133 long timeout;
1131#ifdef DEBUG_TIMING
1132 struct timeval t;
1133#endif
1134 1134
1135 spin_lock_irqsave(&(smi_info->si_lock), flags); 1135 spin_lock_irqsave(&(smi_info->si_lock), flags);
1136#ifdef DEBUG_TIMING 1136 debug_timestamp("Timer");
1137 do_gettimeofday(&t); 1137
1138 printk(KERN_DEBUG "**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec);
1139#endif
1140 jiffies_now = jiffies; 1138 jiffies_now = jiffies;
1141 time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) 1139 time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
1142 * SI_USEC_PER_JIFFY); 1140 * SI_USEC_PER_JIFFY);
@@ -1173,18 +1171,13 @@ static irqreturn_t si_irq_handler(int irq, void *data)
1173{ 1171{
1174 struct smi_info *smi_info = data; 1172 struct smi_info *smi_info = data;
1175 unsigned long flags; 1173 unsigned long flags;
1176#ifdef DEBUG_TIMING
1177 struct timeval t;
1178#endif
1179 1174
1180 spin_lock_irqsave(&(smi_info->si_lock), flags); 1175 spin_lock_irqsave(&(smi_info->si_lock), flags);
1181 1176
1182 smi_inc_stat(smi_info, interrupts); 1177 smi_inc_stat(smi_info, interrupts);
1183 1178
1184#ifdef DEBUG_TIMING 1179 debug_timestamp("Interrupt");
1185 do_gettimeofday(&t); 1180
1186 printk(KERN_DEBUG "**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec);
1187#endif
1188 smi_event_handler(smi_info, 0); 1181 smi_event_handler(smi_info, 0);
1189 spin_unlock_irqrestore(&(smi_info->si_lock), flags); 1182 spin_unlock_irqrestore(&(smi_info->si_lock), flags);
1190 return IRQ_HANDLED; 1183 return IRQ_HANDLED;
@@ -2038,18 +2031,13 @@ static u32 ipmi_acpi_gpe(acpi_handle gpe_device,
2038{ 2031{
2039 struct smi_info *smi_info = context; 2032 struct smi_info *smi_info = context;
2040 unsigned long flags; 2033 unsigned long flags;
2041#ifdef DEBUG_TIMING
2042 struct timeval t;
2043#endif
2044 2034
2045 spin_lock_irqsave(&(smi_info->si_lock), flags); 2035 spin_lock_irqsave(&(smi_info->si_lock), flags);
2046 2036
2047 smi_inc_stat(smi_info, interrupts); 2037 smi_inc_stat(smi_info, interrupts);
2048 2038
2049#ifdef DEBUG_TIMING 2039 debug_timestamp("ACPI_GPE");
2050 do_gettimeofday(&t); 2040
2051 printk("**ACPI_GPE: %d.%9.9d\n", t.tv_sec, t.tv_usec);
2052#endif
2053 smi_event_handler(smi_info, 0); 2041 smi_event_handler(smi_info, 0);
2054 spin_unlock_irqrestore(&(smi_info->si_lock), flags); 2042 spin_unlock_irqrestore(&(smi_info->si_lock), flags);
2055 2043
@@ -2071,7 +2059,6 @@ static int acpi_gpe_irq_setup(struct smi_info *info)
2071 if (!info->irq) 2059 if (!info->irq)
2072 return 0; 2060 return 0;
2073 2061
2074 /* FIXME - is level triggered right? */
2075 status = acpi_install_gpe_handler(NULL, 2062 status = acpi_install_gpe_handler(NULL,
2076 info->irq, 2063 info->irq,
2077 ACPI_GPE_LEVEL_TRIGGERED, 2064 ACPI_GPE_LEVEL_TRIGGERED,
@@ -2998,7 +2985,9 @@ static int smi_type_proc_show(struct seq_file *m, void *v)
2998{ 2985{
2999 struct smi_info *smi = m->private; 2986 struct smi_info *smi = m->private;
3000 2987
3001 return seq_printf(m, "%s\n", si_to_str[smi->si_type]); 2988 seq_printf(m, "%s\n", si_to_str[smi->si_type]);
2989
2990 return seq_has_overflowed(m);
3002} 2991}
3003 2992
3004static int smi_type_proc_open(struct inode *inode, struct file *file) 2993static int smi_type_proc_open(struct inode *inode, struct file *file)
@@ -3060,16 +3049,18 @@ static int smi_params_proc_show(struct seq_file *m, void *v)
3060{ 3049{
3061 struct smi_info *smi = m->private; 3050 struct smi_info *smi = m->private;
3062 3051
3063 return seq_printf(m, 3052 seq_printf(m,
3064 "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n", 3053 "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
3065 si_to_str[smi->si_type], 3054 si_to_str[smi->si_type],
3066 addr_space_to_str[smi->io.addr_type], 3055 addr_space_to_str[smi->io.addr_type],
3067 smi->io.addr_data, 3056 smi->io.addr_data,
3068 smi->io.regspacing, 3057 smi->io.regspacing,
3069 smi->io.regsize, 3058 smi->io.regsize,
3070 smi->io.regshift, 3059 smi->io.regshift,
3071 smi->irq, 3060 smi->irq,
3072 smi->slave_addr); 3061 smi->slave_addr);
3062
3063 return seq_has_overflowed(m);
3073} 3064}
3074 3065
3075static int smi_params_proc_open(struct inode *inode, struct file *file) 3066static int smi_params_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 982b96323f82..f6e378dac5f5 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1097,8 +1097,6 @@ static int ssif_remove(struct i2c_client *client)
1097 if (!ssif_info) 1097 if (!ssif_info)
1098 return 0; 1098 return 0;
1099 1099
1100 i2c_set_clientdata(client, NULL);
1101
1102 /* 1100 /*
1103 * After this point, we won't deliver anything asychronously 1101 * After this point, we won't deliver anything asychronously
1104 * to the message handler. We can unregister ourself. 1102 * to the message handler. We can unregister ourself.
@@ -1198,7 +1196,9 @@ static int ssif_detect(struct i2c_client *client, struct i2c_board_info *info)
1198 1196
1199static int smi_type_proc_show(struct seq_file *m, void *v) 1197static int smi_type_proc_show(struct seq_file *m, void *v)
1200{ 1198{
1201 return seq_puts(m, "ssif\n"); 1199 seq_puts(m, "ssif\n");
1200
1201 return seq_has_overflowed(m);
1202} 1202}
1203 1203
1204static int smi_type_proc_open(struct inode *inode, struct file *file) 1204static int smi_type_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index af5d63c7cc53..2fe195002021 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -75,29 +75,25 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
75 unsigned long key; 75 unsigned long key;
76 u32 desc_version; 76 u32 desc_version;
77 77
78 *map_size = 0; 78 *map_size = sizeof(*m) * 32;
79 *desc_size = 0; 79again:
80 key = 0;
81 status = efi_call_early(get_memory_map, map_size, NULL,
82 &key, desc_size, &desc_version);
83 if (status != EFI_BUFFER_TOO_SMALL)
84 return EFI_LOAD_ERROR;
85
86 /* 80 /*
87 * Add an additional efi_memory_desc_t because we're doing an 81 * Add an additional efi_memory_desc_t because we're doing an
88 * allocation which may be in a new descriptor region. 82 * allocation which may be in a new descriptor region.
89 */ 83 */
90 *map_size += *desc_size; 84 *map_size += sizeof(*m);
91 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, 85 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
92 *map_size, (void **)&m); 86 *map_size, (void **)&m);
93 if (status != EFI_SUCCESS) 87 if (status != EFI_SUCCESS)
94 goto fail; 88 goto fail;
95 89
90 *desc_size = 0;
91 key = 0;
96 status = efi_call_early(get_memory_map, map_size, m, 92 status = efi_call_early(get_memory_map, map_size, m,
97 &key, desc_size, &desc_version); 93 &key, desc_size, &desc_version);
98 if (status == EFI_BUFFER_TOO_SMALL) { 94 if (status == EFI_BUFFER_TOO_SMALL) {
99 efi_call_early(free_pool, m); 95 efi_call_early(free_pool, m);
100 return EFI_LOAD_ERROR; 96 goto again;
101 } 97 }
102 98
103 if (status != EFI_SUCCESS) 99 if (status != EFI_SUCCESS)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 638e797037da..97527614141b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -735,6 +735,31 @@ config INTEL_IPS
735 functionality. If in doubt, say Y here; it will only load on 735 functionality. If in doubt, say Y here; it will only load on
736 supported platforms. 736 supported platforms.
737 737
738config INTEL_IMR
739 bool "Intel Isolated Memory Region support"
740 default n
741 depends on X86_INTEL_QUARK && IOSF_MBI
742 ---help---
743 This option provides a means to manipulate Isolated Memory Regions.
744 IMRs are a set of registers that define read and write access masks
745 to prohibit certain system agents from accessing memory with 1 KiB
746 granularity.
747
748 IMRs make it possible to control read/write access to an address
749 by hardware agents inside the SoC. Read and write masks can be
750 defined for:
751 - eSRAM flush
752 - Dirty CPU snoop (write only)
753 - RMU access
754 - PCI Virtual Channel 0/Virtual Channel 1
755 - SMM mode
756 - Non SMM mode
757
758 Quark contains a set of eight IMR registers and makes use of those
759 registers during its bootup process.
760
761 If you are running on a Galileo/Quark say Y here.
762
738config IBM_RTL 763config IBM_RTL
739 tristate "Device driver to enable PRTL support" 764 tristate "Device driver to enable PRTL support"
740 depends on X86 && PCI 765 depends on X86 && PCI
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 02b16910f4c9..995986b8e36b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -645,11 +645,12 @@ out:
645 645
646static unsigned long randomize_stack_top(unsigned long stack_top) 646static unsigned long randomize_stack_top(unsigned long stack_top)
647{ 647{
648 unsigned int random_variable = 0; 648 unsigned long random_variable = 0;
649 649
650 if ((current->flags & PF_RANDOMIZE) && 650 if ((current->flags & PF_RANDOMIZE) &&
651 !(current->personality & ADDR_NO_RANDOMIZE)) { 651 !(current->personality & ADDR_NO_RANDOMIZE)) {
652 random_variable = get_random_int() & STACK_RND_MASK; 652 random_variable = (unsigned long) get_random_int();
653 random_variable &= STACK_RND_MASK;
653 random_variable <<= PAGE_SHIFT; 654 random_variable <<= PAGE_SHIFT;
654 } 655 }
655#ifdef CONFIG_STACK_GROWSUP 656#ifdef CONFIG_STACK_GROWSUP
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d1ec10a940ff..1b45e4a0519b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -202,7 +202,7 @@ static __always_inline void data_access_exceeds_word_size(void)
202{ 202{
203} 203}
204 204
205static __always_inline void __read_once_size(volatile void *p, void *res, int size) 205static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
206{ 206{
207 switch (size) { 207 switch (size) {
208 case 1: *(__u8 *)res = *(volatile __u8 *)p; break; 208 case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
@@ -259,10 +259,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
259 */ 259 */
260 260
261#define READ_ONCE(x) \ 261#define READ_ONCE(x) \
262 ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) 262 ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
263 263
264#define WRITE_ONCE(x, val) \ 264#define WRITE_ONCE(x, val) \
265 ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; }) 265 ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
266 266
267#endif /* __KERNEL__ */ 267#endif /* __KERNEL__ */
268 268
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 75ae2e2631fc..a19bcf9e762e 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -156,8 +156,14 @@ typedef enum {
156 KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */ 156 KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */
157} kdb_reason_t; 157} kdb_reason_t;
158 158
159enum kdb_msgsrc {
160 KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */
161 KDB_MSGSRC_PRINTK, /* trapped from printk() */
162};
163
159extern int kdb_trap_printk; 164extern int kdb_trap_printk;
160extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args); 165extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
166 va_list args);
161extern __printf(1, 2) int kdb_printf(const char *, ...); 167extern __printf(1, 2) int kdb_printf(const char *, ...);
162typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); 168typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
163 169
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 41c60e5302d7..6d77432e14ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
363 */ 363 */
364extern void show_stack(struct task_struct *task, unsigned long *sp); 364extern void show_stack(struct task_struct *task, unsigned long *sp);
365 365
366void io_schedule(void);
367long io_schedule_timeout(long timeout);
368
369extern void cpu_init (void); 366extern void cpu_init (void);
370extern void trap_init(void); 367extern void trap_init(void);
371extern void update_process_times(int user); 368extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
422asmlinkage void schedule(void); 419asmlinkage void schedule(void);
423extern void schedule_preempt_disabled(void); 420extern void schedule_preempt_disabled(void);
424 421
422extern long io_schedule_timeout(long timeout);
423
424static inline void io_schedule(void)
425{
426 io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
427}
428
425struct nsproxy; 429struct nsproxy;
426struct user_namespace; 430struct user_namespace;
427 431
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 07ce18ca71e0..0874e2edd275 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -604,7 +604,7 @@ return_normal:
604 online_cpus) 604 online_cpus)
605 cpu_relax(); 605 cpu_relax();
606 if (!time_left) 606 if (!time_left)
607 pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); 607 pr_crit("Timed out waiting for secondary CPUs.\n");
608 608
609 /* 609 /*
610 * At this point the primary processor is completely 610 * At this point the primary processor is completely
@@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
696 696
697 if (arch_kgdb_ops.enable_nmi) 697 if (arch_kgdb_ops.enable_nmi)
698 arch_kgdb_ops.enable_nmi(0); 698 arch_kgdb_ops.enable_nmi(0);
699 /*
700 * Avoid entering the debugger if we were triggered due to an oops
701 * but panic_timeout indicates the system should automatically
702 * reboot on panic. We don't want to get stuck waiting for input
703 * on such systems, especially if its "just" an oops.
704 */
705 if (signo != SIGTRAP && panic_timeout)
706 return 1;
699 707
700 memset(ks, 0, sizeof(struct kgdb_state)); 708 memset(ks, 0, sizeof(struct kgdb_state));
701 ks->cpu = raw_smp_processor_id(); 709 ks->cpu = raw_smp_processor_id();
@@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self,
828 unsigned long val, 836 unsigned long val,
829 void *data) 837 void *data)
830{ 838{
839 /*
840 * Avoid entering the debugger if we were triggered due to a panic
841 * We don't want to get stuck waiting for input from user in such case.
842 * panic_timeout indicates the system should automatically
843 * reboot on panic.
844 */
845 if (panic_timeout)
846 return NOTIFY_DONE;
847
831 if (dbg_kdb_mode) 848 if (dbg_kdb_mode)
832 kdb_printf("PANIC: %s\n", (char *)data); 849 kdb_printf("PANIC: %s\n", (char *)data);
833 kgdb_breakpoint(); 850 kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 7c70812caea5..fc1ef736253c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -439,7 +439,7 @@ poll_again:
439 * substituted for %d, %x or %o in the prompt. 439 * substituted for %d, %x or %o in the prompt.
440 */ 440 */
441 441
442char *kdb_getstr(char *buffer, size_t bufsize, char *prompt) 442char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
443{ 443{
444 if (prompt && kdb_prompt_str != prompt) 444 if (prompt && kdb_prompt_str != prompt)
445 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); 445 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
@@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor)
548 return 0; 548 return 0;
549} 549}
550 550
551int vkdb_printf(const char *fmt, va_list ap) 551int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
552{ 552{
553 int diag; 553 int diag;
554 int linecount; 554 int linecount;
@@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap)
680 size_avail = sizeof(kdb_buffer) - len; 680 size_avail = sizeof(kdb_buffer) - len;
681 goto kdb_print_out; 681 goto kdb_print_out;
682 } 682 }
683 if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
684 /*
685 * This was a interactive search (using '/' at more
686 * prompt) and it has completed. Clear the flag.
687 */
688 kdb_grepping_flag = 0;
683 /* 689 /*
684 * at this point the string is a full line and 690 * at this point the string is a full line and
685 * should be printed, up to the null. 691 * should be printed, up to the null.
@@ -691,19 +697,20 @@ kdb_printit:
691 * Write to all consoles. 697 * Write to all consoles.
692 */ 698 */
693 retlen = strlen(kdb_buffer); 699 retlen = strlen(kdb_buffer);
700 cp = (char *) printk_skip_level(kdb_buffer);
694 if (!dbg_kdb_mode && kgdb_connected) { 701 if (!dbg_kdb_mode && kgdb_connected) {
695 gdbstub_msg_write(kdb_buffer, retlen); 702 gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
696 } else { 703 } else {
697 if (dbg_io_ops && !dbg_io_ops->is_console) { 704 if (dbg_io_ops && !dbg_io_ops->is_console) {
698 len = retlen; 705 len = retlen - (cp - kdb_buffer);
699 cp = kdb_buffer; 706 cp2 = cp;
700 while (len--) { 707 while (len--) {
701 dbg_io_ops->write_char(*cp); 708 dbg_io_ops->write_char(*cp2);
702 cp++; 709 cp2++;
703 } 710 }
704 } 711 }
705 while (c) { 712 while (c) {
706 c->write(c, kdb_buffer, retlen); 713 c->write(c, cp, retlen - (cp - kdb_buffer));
707 touch_nmi_watchdog(); 714 touch_nmi_watchdog();
708 c = c->next; 715 c = c->next;
709 } 716 }
@@ -711,7 +718,10 @@ kdb_printit:
711 if (logging) { 718 if (logging) {
712 saved_loglevel = console_loglevel; 719 saved_loglevel = console_loglevel;
713 console_loglevel = CONSOLE_LOGLEVEL_SILENT; 720 console_loglevel = CONSOLE_LOGLEVEL_SILENT;
714 printk(KERN_INFO "%s", kdb_buffer); 721 if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
722 printk("%s", kdb_buffer);
723 else
724 pr_info("%s", kdb_buffer);
715 } 725 }
716 726
717 if (KDB_STATE(PAGER)) { 727 if (KDB_STATE(PAGER)) {
@@ -794,11 +804,23 @@ kdb_printit:
794 kdb_nextline = linecount - 1; 804 kdb_nextline = linecount - 1;
795 kdb_printf("\r"); 805 kdb_printf("\r");
796 suspend_grep = 1; /* for this recursion */ 806 suspend_grep = 1; /* for this recursion */
807 } else if (buf1[0] == '/' && !kdb_grepping_flag) {
808 kdb_printf("\r");
809 kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN,
810 kdbgetenv("SEARCHPROMPT") ?: "search> ");
811 *strchrnul(kdb_grep_string, '\n') = '\0';
812 kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH;
813 suspend_grep = 1; /* for this recursion */
797 } else if (buf1[0] && buf1[0] != '\n') { 814 } else if (buf1[0] && buf1[0] != '\n') {
798 /* user hit something other than enter */ 815 /* user hit something other than enter */
799 suspend_grep = 1; /* for this recursion */ 816 suspend_grep = 1; /* for this recursion */
800 kdb_printf("\nOnly 'q' or 'Q' are processed at more " 817 if (buf1[0] != '/')
801 "prompt, input ignored\n"); 818 kdb_printf(
819 "\nOnly 'q', 'Q' or '/' are processed at "
820 "more prompt, input ignored\n");
821 else
822 kdb_printf("\n'/' cannot be used during | "
823 "grep filtering, input ignored\n");
802 } else if (kdb_grepping_flag) { 824 } else if (kdb_grepping_flag) {
803 /* user hit enter */ 825 /* user hit enter */
804 suspend_grep = 1; /* for this recursion */ 826 suspend_grep = 1; /* for this recursion */
@@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...)
844 int r; 866 int r;
845 867
846 va_start(ap, fmt); 868 va_start(ap, fmt);
847 r = vkdb_printf(fmt, ap); 869 r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
848 va_end(ap); 870 va_end(ap);
849 871
850 return r; 872 return r;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7b40c5f07dce..4121345498e0 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -50,8 +50,7 @@
50static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; 50static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
51module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); 51module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
52 52
53#define GREP_LEN 256 53char kdb_grep_string[KDB_GREP_STRLEN];
54char kdb_grep_string[GREP_LEN];
55int kdb_grepping_flag; 54int kdb_grepping_flag;
56EXPORT_SYMBOL(kdb_grepping_flag); 55EXPORT_SYMBOL(kdb_grepping_flag);
57int kdb_grep_leading; 56int kdb_grep_leading;
@@ -870,7 +869,7 @@ static void parse_grep(const char *str)
870 len = strlen(cp); 869 len = strlen(cp);
871 if (!len) 870 if (!len)
872 return; 871 return;
873 if (len >= GREP_LEN) { 872 if (len >= KDB_GREP_STRLEN) {
874 kdb_printf("search string too long\n"); 873 kdb_printf("search string too long\n");
875 return; 874 return;
876 } 875 }
@@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr)
915 char *cp; 914 char *cp;
916 char *cpp, quoted; 915 char *cpp, quoted;
917 kdbtab_t *tp; 916 kdbtab_t *tp;
918 int i, escaped, ignore_errors = 0, check_grep; 917 int i, escaped, ignore_errors = 0, check_grep = 0;
919 918
920 /* 919 /*
921 * First tokenize the command string. 920 * First tokenize the command string.
922 */ 921 */
923 cp = (char *)cmdstr; 922 cp = (char *)cmdstr;
924 kdb_grepping_flag = check_grep = 0;
925 923
926 if (KDB_FLAG(CMD_INTERRUPT)) { 924 if (KDB_FLAG(CMD_INTERRUPT)) {
927 /* Previous command was interrupted, newline must not 925 /* Previous command was interrupted, newline must not
@@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1247 kdb_printf("due to NonMaskable Interrupt @ " 1245 kdb_printf("due to NonMaskable Interrupt @ "
1248 kdb_machreg_fmt "\n", 1246 kdb_machreg_fmt "\n",
1249 instruction_pointer(regs)); 1247 instruction_pointer(regs));
1250 kdb_dumpregs(regs);
1251 break; 1248 break;
1252 case KDB_REASON_SSTEP: 1249 case KDB_REASON_SSTEP:
1253 case KDB_REASON_BREAK: 1250 case KDB_REASON_BREAK:
@@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1281 */ 1278 */
1282 kdb_nextline = 1; 1279 kdb_nextline = 1;
1283 KDB_STATE_CLEAR(SUPPRESS); 1280 KDB_STATE_CLEAR(SUPPRESS);
1281 kdb_grepping_flag = 0;
1282 /* ensure the old search does not leak into '/' commands */
1283 kdb_grep_string[0] = '\0';
1284 1284
1285 cmdbuf = cmd_cur; 1285 cmdbuf = cmd_cur;
1286 *cmdbuf = '\0'; 1286 *cmdbuf = '\0';
@@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
2256 /* 2256 /*
2257 * Validate cpunum 2257 * Validate cpunum
2258 */ 2258 */
2259 if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) 2259 if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
2260 return KDB_BADCPUNUM; 2260 return KDB_BADCPUNUM;
2261 2261
2262 dbg_switch_cpu = cpunum; 2262 dbg_switch_cpu = cpunum;
@@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv)
2583#define K(x) ((x) << (PAGE_SHIFT - 10)) 2583#define K(x) ((x) << (PAGE_SHIFT - 10))
2584 kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" 2584 kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n"
2585 "Buffers: %8lu kB\n", 2585 "Buffers: %8lu kB\n",
2586 val.totalram, val.freeram, val.bufferram); 2586 K(val.totalram), K(val.freeram), K(val.bufferram));
2587 return 0; 2587 return 0;
2588} 2588}
2589 2589
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index eaacd1693954..75014d7f4568 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
196 196
197/* Miscellaneous functions and data areas */ 197/* Miscellaneous functions and data areas */
198extern int kdb_grepping_flag; 198extern int kdb_grepping_flag;
199#define KDB_GREPPING_FLAG_SEARCH 0x8000
199extern char kdb_grep_string[]; 200extern char kdb_grep_string[];
201#define KDB_GREP_STRLEN 256
200extern int kdb_grep_leading; 202extern int kdb_grep_leading;
201extern int kdb_grep_trailing; 203extern int kdb_grep_trailing;
202extern char *kdb_cmds[]; 204extern char *kdb_cmds[];
@@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p);
209extern void kdb_print_nameval(const char *name, unsigned long val); 211extern void kdb_print_nameval(const char *name, unsigned long val);
210extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); 212extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
211extern void kdb_meminfo_proc_show(void); 213extern void kdb_meminfo_proc_show(void);
212extern char *kdb_getstr(char *, size_t, char *); 214extern char *kdb_getstr(char *, size_t, const char *);
213extern void kdb_gdb_state_pass(char *buf); 215extern void kdb_gdb_state_pass(char *buf);
214 216
215/* Defines for kdb_symbol_print */ 217/* Defines for kdb_symbol_print */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3059bc2f022d..e16e5542bf13 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
1193 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1193 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1194 1194
1195 if (unlikely(ret)) { 1195 if (unlikely(ret)) {
1196 remove_waiter(lock, &waiter); 1196 if (rt_mutex_has_waiters(lock))
1197 remove_waiter(lock, &waiter);
1197 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1198 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1198 } 1199 }
1199 1200
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c06df7de0963..01cfd69c54c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args)
1811 1811
1812#ifdef CONFIG_KGDB_KDB 1812#ifdef CONFIG_KGDB_KDB
1813 if (unlikely(kdb_trap_printk)) { 1813 if (unlikely(kdb_trap_printk)) {
1814 r = vkdb_printf(fmt, args); 1814 r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
1815 return r; 1815 return r;
1816 } 1816 }
1817#endif 1817#endif
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d7bbe3095ad..0a571e9a0f1d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -326,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t)
326 special = t->rcu_read_unlock_special; 326 special = t->rcu_read_unlock_special;
327 if (special.b.need_qs) { 327 if (special.b.need_qs) {
328 rcu_preempt_qs(); 328 rcu_preempt_qs();
329 t->rcu_read_unlock_special.b.need_qs = false;
329 if (!t->rcu_read_unlock_special.s) { 330 if (!t->rcu_read_unlock_special.s) {
330 local_irq_restore(flags); 331 local_irq_restore(flags);
331 return; 332 return;
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230fb86a..eae160dd669d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
87 * so we don't have to move tasks around upon policy change, 87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly. 88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows 89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group() 90 * the policy change to proceed.
91 * returns &root_task_group, so zero bandwidth is required.
92 */ 91 */
93 free_rt_sched_group(tg); 92 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se; 93 tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
115 if (tg != &root_task_group) 114 if (tg != &root_task_group)
116 return false; 115 return false;
117 116
118 if (p->sched_class != &fair_sched_class)
119 return false;
120
121 /* 117 /*
122 * We can only assume the task group can't go away on us if 118 * We can only assume the task group can't go away on us if
123 * autogroup_move_group() can see us on ->thread_group list. 119 * autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7052d3fd4e7b..8d0f35debf35 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
274 * first without taking the lock so we can 274 * first without taking the lock so we can
275 * return early in the blocking case. 275 * return early in the blocking case.
276 */ 276 */
277 if (!ACCESS_ONCE(x->done)) 277 if (!READ_ONCE(x->done))
278 return 0; 278 return 0;
279 279
280 spin_lock_irqsave(&x->wait.lock, flags); 280 spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
297 */ 297 */
298bool completion_done(struct completion *x) 298bool completion_done(struct completion *x)
299{ 299{
300 return !!ACCESS_ONCE(x->done); 300 if (!READ_ONCE(x->done))
301 return false;
302
303 /*
304 * If ->done, we need to wait for complete() to release ->wait.lock
305 * otherwise we can end up freeing the completion before complete()
306 * is done referencing it.
307 *
308 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
309 * the loads of ->done and ->wait.lock such that we cannot observe
310 * the lock before complete() acquires it while observing the ->done
311 * after it's acquired the lock.
312 */
313 smp_rmb();
314 spin_unlock_wait(&x->wait.lock);
315 return true;
301} 316}
302EXPORT_SYMBOL(completion_done); 317EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13049aac05a6..f0f831e8a345 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,66 +307,6 @@ __read_mostly int scheduler_running;
307int sysctl_sched_rt_runtime = 950000; 307int sysctl_sched_rt_runtime = 950000;
308 308
309/* 309/*
310 * __task_rq_lock - lock the rq @p resides on.
311 */
312static inline struct rq *__task_rq_lock(struct task_struct *p)
313 __acquires(rq->lock)
314{
315 struct rq *rq;
316
317 lockdep_assert_held(&p->pi_lock);
318
319 for (;;) {
320 rq = task_rq(p);
321 raw_spin_lock(&rq->lock);
322 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
323 return rq;
324 raw_spin_unlock(&rq->lock);
325
326 while (unlikely(task_on_rq_migrating(p)))
327 cpu_relax();
328 }
329}
330
331/*
332 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
333 */
334static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
335 __acquires(p->pi_lock)
336 __acquires(rq->lock)
337{
338 struct rq *rq;
339
340 for (;;) {
341 raw_spin_lock_irqsave(&p->pi_lock, *flags);
342 rq = task_rq(p);
343 raw_spin_lock(&rq->lock);
344 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
345 return rq;
346 raw_spin_unlock(&rq->lock);
347 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
348
349 while (unlikely(task_on_rq_migrating(p)))
350 cpu_relax();
351 }
352}
353
354static void __task_rq_unlock(struct rq *rq)
355 __releases(rq->lock)
356{
357 raw_spin_unlock(&rq->lock);
358}
359
360static inline void
361task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
362 __releases(rq->lock)
363 __releases(p->pi_lock)
364{
365 raw_spin_unlock(&rq->lock);
366 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
367}
368
369/*
370 * this_rq_lock - lock this runqueue and disable interrupts. 310 * this_rq_lock - lock this runqueue and disable interrupts.
371 */ 311 */
372static struct rq *this_rq_lock(void) 312static struct rq *this_rq_lock(void)
@@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
2899 preempt_disable(); 2839 preempt_disable();
2900} 2840}
2901 2841
2902static void preempt_schedule_common(void) 2842static void __sched notrace preempt_schedule_common(void)
2903{ 2843{
2904 do { 2844 do {
2905 __preempt_count_add(PREEMPT_ACTIVE); 2845 __preempt_count_add(PREEMPT_ACTIVE);
@@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
4418 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 4358 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
4419 * that process accounting knows that this is a task in IO wait state. 4359 * that process accounting knows that this is a task in IO wait state.
4420 */ 4360 */
4421void __sched io_schedule(void)
4422{
4423 struct rq *rq = raw_rq();
4424
4425 delayacct_blkio_start();
4426 atomic_inc(&rq->nr_iowait);
4427 blk_flush_plug(current);
4428 current->in_iowait = 1;
4429 schedule();
4430 current->in_iowait = 0;
4431 atomic_dec(&rq->nr_iowait);
4432 delayacct_blkio_end();
4433}
4434EXPORT_SYMBOL(io_schedule);
4435
4436long __sched io_schedule_timeout(long timeout) 4361long __sched io_schedule_timeout(long timeout)
4437{ 4362{
4438 struct rq *rq = raw_rq(); 4363 int old_iowait = current->in_iowait;
4364 struct rq *rq;
4439 long ret; 4365 long ret;
4440 4366
4367 current->in_iowait = 1;
4368 if (old_iowait)
4369 blk_schedule_flush_plug(current);
4370 else
4371 blk_flush_plug(current);
4372
4441 delayacct_blkio_start(); 4373 delayacct_blkio_start();
4374 rq = raw_rq();
4442 atomic_inc(&rq->nr_iowait); 4375 atomic_inc(&rq->nr_iowait);
4443 blk_flush_plug(current);
4444 current->in_iowait = 1;
4445 ret = schedule_timeout(timeout); 4376 ret = schedule_timeout(timeout);
4446 current->in_iowait = 0; 4377 current->in_iowait = old_iowait;
4447 atomic_dec(&rq->nr_iowait); 4378 atomic_dec(&rq->nr_iowait);
4448 delayacct_blkio_end(); 4379 delayacct_blkio_end();
4380
4449 return ret; 4381 return ret;
4450} 4382}
4383EXPORT_SYMBOL(io_schedule_timeout);
4451 4384
4452/** 4385/**
4453 * sys_sched_get_priority_max - return maximum RT priority. 4386 * sys_sched_get_priority_max - return maximum RT priority.
@@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
7642{ 7575{
7643 struct task_struct *g, *p; 7576 struct task_struct *g, *p;
7644 7577
7578 /*
7579 * Autogroups do not have RT tasks; see autogroup_create().
7580 */
7581 if (task_group_is_autogroup(tg))
7582 return 0;
7583
7645 for_each_process_thread(g, p) { 7584 for_each_process_thread(g, p) {
7646 if (rt_task(p) && task_group(p) == tg) 7585 if (rt_task(p) && task_group(p) == tg)
7647 return 1; 7586 return 1;
@@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
7734{ 7673{
7735 int i, err = 0; 7674 int i, err = 0;
7736 7675
7676 /*
7677 * Disallowing the root group RT runtime is BAD, it would disallow the
7678 * kernel creating (and or operating) RT threads.
7679 */
7680 if (tg == &root_task_group && rt_runtime == 0)
7681 return -EINVAL;
7682
7683 /* No period doesn't make any sense. */
7684 if (rt_period == 0)
7685 return -EINVAL;
7686
7737 mutex_lock(&rt_constraints_mutex); 7687 mutex_lock(&rt_constraints_mutex);
7738 read_lock(&tasklist_lock); 7688 read_lock(&tasklist_lock);
7739 err = __rt_schedulable(tg, rt_period, rt_runtime); 7689 err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
7790 rt_period = (u64)rt_period_us * NSEC_PER_USEC; 7740 rt_period = (u64)rt_period_us * NSEC_PER_USEC;
7791 rt_runtime = tg->rt_bandwidth.rt_runtime; 7741 rt_runtime = tg->rt_bandwidth.rt_runtime;
7792 7742
7793 if (rt_period == 0)
7794 return -EINVAL;
7795
7796 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 7743 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7797} 7744}
7798 7745
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a027799ae130..3fa8fa6d9403 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
511 struct sched_dl_entity, 511 struct sched_dl_entity,
512 dl_timer); 512 dl_timer);
513 struct task_struct *p = dl_task_of(dl_se); 513 struct task_struct *p = dl_task_of(dl_se);
514 unsigned long flags;
514 struct rq *rq; 515 struct rq *rq;
515again:
516 rq = task_rq(p);
517 raw_spin_lock(&rq->lock);
518 516
519 if (rq != task_rq(p)) { 517 rq = task_rq_lock(current, &flags);
520 /* Task was moved, retrying. */
521 raw_spin_unlock(&rq->lock);
522 goto again;
523 }
524 518
525 /* 519 /*
526 * We need to take care of several possible races here: 520 * We need to take care of several possible races here:
@@ -541,6 +535,26 @@ again:
541 535
542 sched_clock_tick(); 536 sched_clock_tick();
543 update_rq_clock(rq); 537 update_rq_clock(rq);
538
539 /*
540 * If the throttle happened during sched-out; like:
541 *
542 * schedule()
543 * deactivate_task()
544 * dequeue_task_dl()
545 * update_curr_dl()
546 * start_dl_timer()
547 * __dequeue_task_dl()
548 * prev->on_rq = 0;
549 *
550 * We can be both throttled and !queued. Replenish the counter
551 * but do not enqueue -- wait for our wakeup to do that.
552 */
553 if (!task_on_rq_queued(p)) {
554 replenish_dl_entity(dl_se, dl_se);
555 goto unlock;
556 }
557
544 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 558 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
545 if (dl_task(rq->curr)) 559 if (dl_task(rq->curr))
546 check_preempt_curr_dl(rq, p, 0); 560 check_preempt_curr_dl(rq, p, 0);
@@ -555,7 +569,7 @@ again:
555 push_dl_task(rq); 569 push_dl_task(rq);
556#endif 570#endif
557unlock: 571unlock:
558 raw_spin_unlock(&rq->lock); 572 task_rq_unlock(rq, current, &flags);
559 573
560 return HRTIMER_NORESTART; 574 return HRTIMER_NORESTART;
561} 575}
@@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
898 rq->curr->dl.dl_yielded = 1; 912 rq->curr->dl.dl_yielded = 1;
899 p->dl.runtime = 0; 913 p->dl.runtime = 0;
900 } 914 }
915 update_rq_clock(rq);
901 update_curr_dl(rq); 916 update_curr_dl(rq);
902} 917}
903 918
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0870db23d79c..dc0f435a2779 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
1380 1380
1381extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); 1381extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1382 1382
1383/*
1384 * __task_rq_lock - lock the rq @p resides on.
1385 */
1386static inline struct rq *__task_rq_lock(struct task_struct *p)
1387 __acquires(rq->lock)
1388{
1389 struct rq *rq;
1390
1391 lockdep_assert_held(&p->pi_lock);
1392
1393 for (;;) {
1394 rq = task_rq(p);
1395 raw_spin_lock(&rq->lock);
1396 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
1397 return rq;
1398 raw_spin_unlock(&rq->lock);
1399
1400 while (unlikely(task_on_rq_migrating(p)))
1401 cpu_relax();
1402 }
1403}
1404
1405/*
1406 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
1407 */
1408static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
1409 __acquires(p->pi_lock)
1410 __acquires(rq->lock)
1411{
1412 struct rq *rq;
1413
1414 for (;;) {
1415 raw_spin_lock_irqsave(&p->pi_lock, *flags);
1416 rq = task_rq(p);
1417 raw_spin_lock(&rq->lock);
1418 /*
1419 * move_queued_task() task_rq_lock()
1420 *
1421 * ACQUIRE (rq->lock)
1422 * [S] ->on_rq = MIGRATING [L] rq = task_rq()
1423 * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
1424 * [S] ->cpu = new_cpu [L] task_rq()
1425 * [L] ->on_rq
1426 * RELEASE (rq->lock)
1427 *
1428 * If we observe the old cpu in task_rq_lock, the acquire of
1429 * the old rq->lock will fully serialize against the stores.
1430 *
1431 * If we observe the new cpu in task_rq_lock, the acquire will
1432 * pair with the WMB to ensure we must then also see migrating.
1433 */
1434 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
1435 return rq;
1436 raw_spin_unlock(&rq->lock);
1437 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1438
1439 while (unlikely(task_on_rq_migrating(p)))
1440 cpu_relax();
1441 }
1442}
1443
1444static inline void __task_rq_unlock(struct rq *rq)
1445 __releases(rq->lock)
1446{
1447 raw_spin_unlock(&rq->lock);
1448}
1449
1450static inline void
1451task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
1452 __releases(rq->lock)
1453 __releases(p->pi_lock)
1454{
1455 raw_spin_unlock(&rq->lock);
1456 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1457}
1458
1383#ifdef CONFIG_SMP 1459#ifdef CONFIG_SMP
1384#ifdef CONFIG_PREEMPT 1460#ifdef CONFIG_PREEMPT
1385 1461
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b585e0fdd22..0f60b08a4f07 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc)
633 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) 633 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
634 return -EPERM; 634 return -EPERM;
635 635
636 if (txc->modes & ADJ_FREQUENCY) { 636 /*
637 if (LONG_MIN / PPM_SCALE > txc->freq) 637 * Check for potential multiplication overflows that can
638 * only happen on 64-bit systems:
639 */
640 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
641 if (LLONG_MIN / PPM_SCALE > txc->freq)
638 return -EINVAL; 642 return -EINVAL;
639 if (LONG_MAX / PPM_SCALE < txc->freq) 643 if (LLONG_MAX / PPM_SCALE < txc->freq)
640 return -EINVAL; 644 return -EINVAL;
641 } 645 }
642 646